You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by gs...@apache.org on 2008/12/14 04:47:42 UTC
svn commit: r726350 - in /lucene/solr/trunk/contrib/extraction/src:
main/java/org/apache/solr/handler/extraction/SolrContentHandler.java
test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java
test/resources/solr/conf/schema.xml
Author: gsingers
Date: Sat Dec 13 19:47:42 2008
New Revision: 726350
URL: http://svn.apache.org/viewvc?rev=726350&view=rev
Log:
SOLR-284: handle multivalued literals
Modified:
lucene/solr/trunk/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandler.java
lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java
lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/schema.xml
Modified: lucene/solr/trunk/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandler.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandler.java?rev=726350&r1=726349&r2=726350&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandler.java (original)
+++ lucene/solr/trunk/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandler.java Sat Dec 13 19:47:42 2008
@@ -34,13 +34,12 @@
* The class responsible for handling Tika events and translating them into {@link org.apache.solr.common.SolrInputDocument}s.
* <B>This class is not thread-safe.</B>
* <p/>
- *
+ * <p/>
* User's may wish to override this class to provide their own functionality.
*
* @see org.apache.solr.handler.extraction.SolrContentHandlerFactory
* @see org.apache.solr.handler.extraction.ExtractingRequestHandler
* @see org.apache.solr.handler.extraction.ExtractingDocumentLoader
- *
*/
public class SolrContentHandler extends DefaultHandler implements ExtractingParams {
private transient static Logger log = LoggerFactory.getLogger(SolrContentHandler.class);
@@ -151,10 +150,16 @@
//no need to map names here, since they are literals from the user
SchemaField schFld = schema.getFieldOrNull(fieldName);
if (schFld != null) {
- String value = params.get(name);
+ String[] values = params.getParams(name);
+ if (schFld.multiValued() == false && values.length > 1) {
+ throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "The Field " + fieldName + " is not multivalued");
+ }
boost = getBoost(fieldName);
- //no need to transform here, b/c we can assume the user sent it in correctly
- document.addField(fieldName, value, boost);
+ for (int i = 0; i < values.length; i++) {
+ //no need to transform here, b/c we can assume the user sent it in correctly
+ document.addField(fieldName, values[i], boost);
+
+ }
} else {
handleUndeclaredField(fieldName);
}
@@ -219,10 +224,9 @@
//last chance, just create one
uniqId = UUID.randomUUID().toString();
}
- } else if (type instanceof UUIDField){
+ } else if (type instanceof UUIDField) {
uniqId = UUID.randomUUID().toString();
- }
- else {
+ } else {
uniqId = String.valueOf(getNextId());
}
return uniqId;
@@ -294,8 +298,6 @@
}
-
-
/**
* Can be used to transform input values based on their {@link org.apache.solr.schema.SchemaField}
* <p/>
@@ -354,7 +356,7 @@
}
- protected synchronized long getNextId(){
+ protected synchronized long getNextId() {
return identifier++;
}
Modified: lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java?rev=726350&r1=726349&r2=726350&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java (original)
+++ lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java Sat Dec 13 19:47:42 2008
@@ -6,6 +6,7 @@
import org.apache.solr.common.util.ContentStream;
import org.apache.solr.common.util.ContentStreamBase;
import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.SolrException;
import org.apache.solr.handler.extraction.ExtractingParams;
import org.apache.solr.handler.extraction.ExtractingRequestHandler;
@@ -19,8 +20,15 @@
*
**/
public class ExtractingRequestHandlerTest extends AbstractSolrTestCase {
- @Override public String getSchemaFile() { return "schema.xml"; }
- @Override public String getSolrConfigFile() { return "solrconfig.xml"; }
+ @Override
+ public String getSchemaFile() {
+ return "schema.xml";
+ }
+
+ @Override
+ public String getSolrConfigFile() {
+ return "solrconfig.xml";
+ }
public void testExtraction() throws Exception {
@@ -32,9 +40,9 @@
"ext.def.fl", "extractedContent",
"ext.map.Last-Modified", "extractedDate"
);
- assertQ(req("title:solr-word"),"//*[@numFound='0']");
+ assertQ(req("title:solr-word"), "//*[@numFound='0']");
assertU(commit());
- assertQ(req("title:solr-word"),"//*[@numFound='1']");
+ assertQ(req("title:solr-word"), "//*[@numFound='1']");
loadLocal("simple.html", "ext.map.created", "extractedDate", "ext.map.producer", "extractedProducer",
"ext.map.creator", "extractedCreator", "ext.map.Keywords", "extractedKeywords",
@@ -43,9 +51,9 @@
"ext.def.fl", "extractedContent",
"ext.map.Last-Modified", "extractedDate"
);
- assertQ(req("title:Welcome"),"//*[@numFound='0']");
+ assertQ(req("title:Welcome"), "//*[@numFound='0']");
assertU(commit());
- assertQ(req("title:Welcome"),"//*[@numFound='1']");
+ assertQ(req("title:Welcome"), "//*[@numFound='1']");
loadLocal("version_control.xml", "ext.map.created", "extractedDate", "ext.map.producer", "extractedProducer",
"ext.map.creator", "extractedCreator", "ext.map.Keywords", "extractedKeywords",
@@ -53,13 +61,60 @@
"ext.def.fl", "extractedContent",
"ext.map.Last-Modified", "extractedDate"
);
- assertQ(req("stream_name:version_control.xml"),"//*[@numFound='0']");
+ assertQ(req("stream_name:version_control.xml"), "//*[@numFound='0']");
assertU(commit());
- assertQ(req("stream_name:version_control.xml"),"//*[@numFound='1']");
+ assertQ(req("stream_name:version_control.xml"), "//*[@numFound='1']");
+
+
}
-
+ public void testLiterals() throws Exception {
+ ExtractingRequestHandler handler = (ExtractingRequestHandler) h.getCore().getRequestHandler("/update/extract");
+ assertTrue("handler is null and it shouldn't be", handler != null);
+ //test literal
+ loadLocal("version_control.xml", "ext.map.created", "extractedDate", "ext.map.producer", "extractedProducer",
+ "ext.map.creator", "extractedCreator", "ext.map.Keywords", "extractedKeywords",
+ "ext.map.Author", "extractedAuthor",
+ "ext.def.fl", "extractedContent",
+ "ext.literal.extractionLiteralMV", "one",
+ "ext.literal.extractionLiteralMV", "two",
+ "ext.map.Last-Modified", "extractedDate"
+
+ );
+ assertQ(req("stream_name:version_control.xml"), "//*[@numFound='0']");
+ assertU(commit());
+ assertQ(req("stream_name:version_control.xml"), "//*[@numFound='1']");
+
+ assertQ(req("extractionLiteralMV:one"), "//*[@numFound='1']");
+ assertQ(req("extractionLiteralMV:two"), "//*[@numFound='1']");
+
+ try {
+ loadLocal("version_control.xml", "ext.map.created", "extractedDate", "ext.map.producer", "extractedProducer",
+ "ext.map.creator", "extractedCreator", "ext.map.Keywords", "extractedKeywords",
+ "ext.map.Author", "extractedAuthor",
+ "ext.def.fl", "extractedContent",
+ "ext.literal.extractionLiteral", "one",
+ "ext.literal.extractionLiteral", "two",
+ "ext.map.Last-Modified", "extractedDate"
+ );
+ assertTrue("Exception should have been thrown", false);
+ } catch (SolrException e) {
+ //nothing to see here, move along
+ }
+
+ loadLocal("version_control.xml", "ext.map.created", "extractedDate", "ext.map.producer", "extractedProducer",
+ "ext.map.creator", "extractedCreator", "ext.map.Keywords", "extractedKeywords",
+ "ext.map.Author", "extractedAuthor",
+ "ext.def.fl", "extractedContent",
+ "ext.literal.extractionLiteral", "one",
+ "ext.map.Last-Modified", "extractedDate"
+ );
+ assertU(commit());
+ assertQ(req("extractionLiteral:one"), "//*[@numFound='1']");
+
+ }
+
public void testPlainTextSpecifyingMimeType() throws Exception {
ExtractingRequestHandler handler = (ExtractingRequestHandler) h.getCore().getRequestHandler("/update/extract");
@@ -71,11 +126,11 @@
"ext.map.Author", "extractedAuthor",
"ext.map.language", "extractedLanguage",
"ext.def.fl", "extractedContent",
- ExtractingParams.STREAM_TYPE, "text/plain"
+ ExtractingParams.STREAM_TYPE, "text/plain"
);
- assertQ(req("extractedContent:Apache"),"//*[@numFound='0']");
+ assertQ(req("extractedContent:Apache"), "//*[@numFound='0']");
assertU(commit());
- assertQ(req("extractedContent:Apache"),"//*[@numFound='1']");
+ assertQ(req("extractedContent:Apache"), "//*[@numFound='1']");
}
public void testPlainTextSpecifyingResourceName() throws Exception {
@@ -88,11 +143,11 @@
"ext.map.Author", "extractedAuthor",
"ext.map.language", "extractedLanguage",
"ext.def.fl", "extractedContent",
- ExtractingParams.RESOURCE_NAME, "version_control.txt"
+ ExtractingParams.RESOURCE_NAME, "version_control.txt"
);
- assertQ(req("extractedContent:Apache"),"//*[@numFound='0']");
+ assertQ(req("extractedContent:Apache"), "//*[@numFound='0']");
assertU(commit());
- assertQ(req("extractedContent:Apache"),"//*[@numFound='1']");
+ assertQ(req("extractedContent:Apache"), "//*[@numFound='1']");
}
// Note: If you load a plain text file specifying neither MIME type nor filename, extraction will silently fail. This is because Tika's
@@ -128,7 +183,7 @@
SolrQueryResponse loadLocal(String filename, String... args) throws Exception {
- LocalSolrQueryRequest req = (LocalSolrQueryRequest)req(args);
+ LocalSolrQueryRequest req = (LocalSolrQueryRequest) req(args);
// TODO: stop using locally defined streams once stream.file and
// stream.body work everywhere
Modified: lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/schema.xml?rev=726350&r1=726349&r2=726350&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/schema.xml (original)
+++ lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/schema.xml Sat Dec 13 19:47:42 2008
@@ -402,6 +402,9 @@
<field name="extractedLanguage" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="resourceName" type="string" indexed="true" stored="true" multiValued="true"/>
+ <field name="extractionLiteralMV" type="string" indexed="true" stored="true" multiValued="true"/>
+ <field name="extractionLiteral" type="string" indexed="true" stored="true" multiValued="false"/>
+
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
will be used if the name matches any of the patterns.