You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by gs...@apache.org on 2008/12/14 04:47:42 UTC

svn commit: r726350 - in /lucene/solr/trunk/contrib/extraction/src: main/java/org/apache/solr/handler/extraction/SolrContentHandler.java test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java test/resources/solr/conf/schema.xml

Author: gsingers
Date: Sat Dec 13 19:47:42 2008
New Revision: 726350

URL: http://svn.apache.org/viewvc?rev=726350&view=rev
Log:
SOLR-284: handle multivalued literals

Modified:
    lucene/solr/trunk/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandler.java
    lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java
    lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/schema.xml

Modified: lucene/solr/trunk/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandler.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandler.java?rev=726350&r1=726349&r2=726350&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandler.java (original)
+++ lucene/solr/trunk/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandler.java Sat Dec 13 19:47:42 2008
@@ -34,13 +34,12 @@
  * The class responsible for handling Tika events and translating them into {@link org.apache.solr.common.SolrInputDocument}s.
  * <B>This class is not thread-safe.</B>
  * <p/>
- *
+ * <p/>
  * User's may wish to override this class to provide their own functionality.
  *
  * @see org.apache.solr.handler.extraction.SolrContentHandlerFactory
  * @see org.apache.solr.handler.extraction.ExtractingRequestHandler
  * @see org.apache.solr.handler.extraction.ExtractingDocumentLoader
- *
  */
 public class SolrContentHandler extends DefaultHandler implements ExtractingParams {
   private transient static Logger log = LoggerFactory.getLogger(SolrContentHandler.class);
@@ -151,10 +150,16 @@
         //no need to map names here, since they are literals from the user
         SchemaField schFld = schema.getFieldOrNull(fieldName);
         if (schFld != null) {
-          String value = params.get(name);
+          String[] values = params.getParams(name);
+          if (schFld.multiValued() == false && values.length > 1) {
+            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "The Field " + fieldName + " is not multivalued");
+          }
           boost = getBoost(fieldName);
-          //no need to transform here, b/c we can assume the user sent it in correctly
-          document.addField(fieldName, value, boost);
+          for (int i = 0; i < values.length; i++) {
+            //no need to transform here, b/c we can assume the user sent it in correctly
+            document.addField(fieldName, values[i], boost);
+
+          }
         } else {
           handleUndeclaredField(fieldName);
         }
@@ -219,10 +224,9 @@
         //last chance, just create one
         uniqId = UUID.randomUUID().toString();
       }
-    } else if (type instanceof UUIDField){
+    } else if (type instanceof UUIDField) {
       uniqId = UUID.randomUUID().toString();
-    }
-    else {
+    } else {
       uniqId = String.valueOf(getNextId());
     }
     return uniqId;
@@ -294,8 +298,6 @@
   }
 
 
-  
-
   /**
    * Can be used to transform input values based on their {@link org.apache.solr.schema.SchemaField}
    * <p/>
@@ -354,7 +356,7 @@
   }
 
 
-  protected synchronized long getNextId(){
+  protected synchronized long getNextId() {
     return identifier++;
   }
 

Modified: lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java?rev=726350&r1=726349&r2=726350&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java (original)
+++ lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java Sat Dec 13 19:47:42 2008
@@ -6,6 +6,7 @@
 import org.apache.solr.common.util.ContentStream;
 import org.apache.solr.common.util.ContentStreamBase;
 import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.SolrException;
 import org.apache.solr.handler.extraction.ExtractingParams;
 import org.apache.solr.handler.extraction.ExtractingRequestHandler;
 
@@ -19,8 +20,15 @@
  *
  **/
 public class ExtractingRequestHandlerTest extends AbstractSolrTestCase {
-  @Override public String getSchemaFile() { return "schema.xml"; }
-  @Override public String getSolrConfigFile() { return "solrconfig.xml"; }
+  @Override
+  public String getSchemaFile() {
+    return "schema.xml";
+  }
+
+  @Override
+  public String getSolrConfigFile() {
+    return "solrconfig.xml";
+  }
 
 
   public void testExtraction() throws Exception {
@@ -32,9 +40,9 @@
             "ext.def.fl", "extractedContent",
             "ext.map.Last-Modified", "extractedDate"
     );
-    assertQ(req("title:solr-word"),"//*[@numFound='0']");
+    assertQ(req("title:solr-word"), "//*[@numFound='0']");
     assertU(commit());
-    assertQ(req("title:solr-word"),"//*[@numFound='1']");
+    assertQ(req("title:solr-word"), "//*[@numFound='1']");
 
     loadLocal("simple.html", "ext.map.created", "extractedDate", "ext.map.producer", "extractedProducer",
             "ext.map.creator", "extractedCreator", "ext.map.Keywords", "extractedKeywords",
@@ -43,9 +51,9 @@
             "ext.def.fl", "extractedContent",
             "ext.map.Last-Modified", "extractedDate"
     );
-    assertQ(req("title:Welcome"),"//*[@numFound='0']");
+    assertQ(req("title:Welcome"), "//*[@numFound='0']");
     assertU(commit());
-    assertQ(req("title:Welcome"),"//*[@numFound='1']");
+    assertQ(req("title:Welcome"), "//*[@numFound='1']");
 
     loadLocal("version_control.xml", "ext.map.created", "extractedDate", "ext.map.producer", "extractedProducer",
             "ext.map.creator", "extractedCreator", "ext.map.Keywords", "extractedKeywords",
@@ -53,13 +61,60 @@
             "ext.def.fl", "extractedContent",
             "ext.map.Last-Modified", "extractedDate"
     );
-    assertQ(req("stream_name:version_control.xml"),"//*[@numFound='0']");
+    assertQ(req("stream_name:version_control.xml"), "//*[@numFound='0']");
     assertU(commit());
-    assertQ(req("stream_name:version_control.xml"),"//*[@numFound='1']");
+    assertQ(req("stream_name:version_control.xml"), "//*[@numFound='1']");
+
+
   }
 
 
-  
+  public void testLiterals() throws Exception {
+    ExtractingRequestHandler handler = (ExtractingRequestHandler) h.getCore().getRequestHandler("/update/extract");
+    assertTrue("handler is null and it shouldn't be", handler != null);
+    //test literal
+    loadLocal("version_control.xml", "ext.map.created", "extractedDate", "ext.map.producer", "extractedProducer",
+            "ext.map.creator", "extractedCreator", "ext.map.Keywords", "extractedKeywords",
+            "ext.map.Author", "extractedAuthor",
+            "ext.def.fl", "extractedContent",
+            "ext.literal.extractionLiteralMV", "one",
+            "ext.literal.extractionLiteralMV", "two",
+            "ext.map.Last-Modified", "extractedDate"
+
+    );
+    assertQ(req("stream_name:version_control.xml"), "//*[@numFound='0']");
+    assertU(commit());
+    assertQ(req("stream_name:version_control.xml"), "//*[@numFound='1']");
+
+    assertQ(req("extractionLiteralMV:one"), "//*[@numFound='1']");
+    assertQ(req("extractionLiteralMV:two"), "//*[@numFound='1']");
+
+    try {
+      loadLocal("version_control.xml", "ext.map.created", "extractedDate", "ext.map.producer", "extractedProducer",
+              "ext.map.creator", "extractedCreator", "ext.map.Keywords", "extractedKeywords",
+              "ext.map.Author", "extractedAuthor",
+              "ext.def.fl", "extractedContent",
+              "ext.literal.extractionLiteral", "one",
+              "ext.literal.extractionLiteral", "two",
+              "ext.map.Last-Modified", "extractedDate"
+      );
+      assertTrue("Exception should have been thrown", false);
+    } catch (SolrException e) {
+      //nothing to see here, move along
+    }
+
+    loadLocal("version_control.xml", "ext.map.created", "extractedDate", "ext.map.producer", "extractedProducer",
+            "ext.map.creator", "extractedCreator", "ext.map.Keywords", "extractedKeywords",
+            "ext.map.Author", "extractedAuthor",
+            "ext.def.fl", "extractedContent",
+            "ext.literal.extractionLiteral", "one",
+            "ext.map.Last-Modified", "extractedDate"
+    );
+    assertU(commit());
+    assertQ(req("extractionLiteral:one"), "//*[@numFound='1']");
+
+  }
+
 
   public void testPlainTextSpecifyingMimeType() throws Exception {
     ExtractingRequestHandler handler = (ExtractingRequestHandler) h.getCore().getRequestHandler("/update/extract");
@@ -71,11 +126,11 @@
             "ext.map.Author", "extractedAuthor",
             "ext.map.language", "extractedLanguage",
             "ext.def.fl", "extractedContent",
-	    ExtractingParams.STREAM_TYPE, "text/plain"
+            ExtractingParams.STREAM_TYPE, "text/plain"
     );
-    assertQ(req("extractedContent:Apache"),"//*[@numFound='0']");
+    assertQ(req("extractedContent:Apache"), "//*[@numFound='0']");
     assertU(commit());
-    assertQ(req("extractedContent:Apache"),"//*[@numFound='1']");
+    assertQ(req("extractedContent:Apache"), "//*[@numFound='1']");
   }
 
   public void testPlainTextSpecifyingResourceName() throws Exception {
@@ -88,11 +143,11 @@
             "ext.map.Author", "extractedAuthor",
             "ext.map.language", "extractedLanguage",
             "ext.def.fl", "extractedContent",
-	    ExtractingParams.RESOURCE_NAME, "version_control.txt"
+            ExtractingParams.RESOURCE_NAME, "version_control.txt"
     );
-    assertQ(req("extractedContent:Apache"),"//*[@numFound='0']");
+    assertQ(req("extractedContent:Apache"), "//*[@numFound='0']");
     assertU(commit());
-    assertQ(req("extractedContent:Apache"),"//*[@numFound='1']");
+    assertQ(req("extractedContent:Apache"), "//*[@numFound='1']");
   }
 
   // Note: If you load a plain text file specifying neither MIME type nor filename, extraction will silently fail. This is because Tika's
@@ -128,7 +183,7 @@
 
 
   SolrQueryResponse loadLocal(String filename, String... args) throws Exception {
-    LocalSolrQueryRequest req =  (LocalSolrQueryRequest)req(args);
+    LocalSolrQueryRequest req = (LocalSolrQueryRequest) req(args);
 
     // TODO: stop using locally defined streams once stream.file and
     // stream.body work everywhere

Modified: lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/schema.xml?rev=726350&r1=726349&r2=726350&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/schema.xml (original)
+++ lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/schema.xml Sat Dec 13 19:47:42 2008
@@ -402,6 +402,9 @@
    <field name="extractedLanguage" type="string" indexed="true" stored="true" multiValued="true"/>
    <field name="resourceName" type="string" indexed="true" stored="true" multiValued="true"/>
 
+   <field name="extractionLiteralMV" type="string" indexed="true" stored="true" multiValued="true"/>
+   <field name="extractionLiteral" type="string" indexed="true" stored="true" multiValued="false"/>
+
    
    <!-- Dynamic field definitions.  If a field name is not found, dynamicFields
         will be used if the name matches any of the patterns.