You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2013/04/05 11:48:28 UTC
svn commit: r1464902 - in /lucene/dev/branches/branch_4x: ./ solr/
solr/core/ solr/core/src/java/org/apache/solr/schema/
solr/core/src/java/org/apache/solr/update/
solr/core/src/java/org/apache/solr/update/processor/
solr/core/src/test-files/solr/colle...
Author: ab
Date: Fri Apr 5 09:48:27 2013
New Revision: 1464902
URL: http://svn.apache.org/r1464902
Log:
SOLR-4648 Add PreAnalyzedUpdateProcessorFactory.
Added:
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/processor/PreAnalyzedUpdateProcessorFactory.java
- copied, changed from r1464889, lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/PreAnalyzedUpdateProcessorFactory.java
lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/update/processor/PreAnalyzedUpdateProcessorTest.java
- copied unchanged from r1464889, lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/processor/PreAnalyzedUpdateProcessorTest.java
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/solr/ (props changed)
lucene/dev/branches/branch_4x/solr/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_4x/solr/core/ (props changed)
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
lucene/dev/branches/branch_4x/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml
lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/schema/PreAnalyzedFieldTest.java
Modified: lucene/dev/branches/branch_4x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/CHANGES.txt?rev=1464902&r1=1464901&r2=1464902&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/solr/CHANGES.txt Fri Apr 5 09:48:27 2013
@@ -62,6 +62,10 @@ Detailed Change List
New Features
----------------------
+* SOLR-4648 PreAnalyzedUpdateProcessorFactory allows using the functionality
+ of PreAnalyzedField with other field types. See javadoc for details and
+ examples. (Andrzej Bialecki)
+
* SOLR-4196 (and others). Solr.xml is being deprecated in favor of a simple
properties file. In the absence of a <solr_home>/solr.xml but the presence of
<solr_home>/solr.properties, two things will happen
Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java?rev=1464902&r1=1464901&r2=1464902&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java Fri Apr 5 09:48:27 2013
@@ -28,6 +28,7 @@ import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.document.Field;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.AttributeSource;
@@ -55,23 +56,27 @@ public class PreAnalyzedField extends Fi
private PreAnalyzedParser parser;
@Override
- protected void init(IndexSchema schema, Map<String, String> args) {
+ public void init(IndexSchema schema, Map<String, String> args) {
super.init(schema, args);
String implName = args.get(PARSER_IMPL);
if (implName == null) {
parser = new JsonPreAnalyzedParser();
} else {
- try {
- Class<?> implClazz = Class.forName(implName);
- if (!PreAnalyzedParser.class.isAssignableFrom(implClazz)) {
- throw new Exception("must implement " + PreAnalyzedParser.class.getName());
- }
- Constructor<?> c = implClazz.getConstructor(new Class<?>[0]);
- parser = (PreAnalyzedParser) c.newInstance(new Object[0]);
- } catch (Exception e) {
- LOG.warn("Can't use the configured PreAnalyzedParser class '" + implName + "' (" +
- e.getMessage() + "), using default " + DEFAULT_IMPL);
+ // short name
+ if ("json".equalsIgnoreCase(implName)) {
parser = new JsonPreAnalyzedParser();
+ } else if ("simple".equalsIgnoreCase(implName)) {
+ parser = new SimplePreAnalyzedParser();
+ } else {
+ try {
+ Class<? extends PreAnalyzedParser> implClazz = schema.getResourceLoader().findClass(implName, PreAnalyzedParser.class);
+ Constructor<?> c = implClazz.getConstructor(new Class<?>[0]);
+ parser = (PreAnalyzedParser) c.newInstance(new Object[0]);
+ } catch (Exception e) {
+ LOG.warn("Can't use the configured PreAnalyzedParser class '" + implName +
+ "', using default " + DEFAULT_IMPL, e);
+ parser = new JsonPreAnalyzedParser();
+ }
}
}
}
@@ -100,7 +105,7 @@ public class PreAnalyzedField extends Fi
try {
f = fromString(field, String.valueOf(value), boost);
} catch (Exception e) {
- e.printStackTrace();
+ LOG.warn("Error parsing pre-analyzed field '" + field.getName() + "'", e);
return null;
}
return f;
@@ -128,6 +133,36 @@ public class PreAnalyzedField extends Fi
}
/**
+ * Utility method to create a {@link org.apache.lucene.document.FieldType}
+ * based on the {@link SchemaField}
+ */
+ public static org.apache.lucene.document.FieldType createFieldType(SchemaField field) {
+ if (!field.indexed() && !field.stored()) {
+ if (log.isTraceEnabled())
+ log.trace("Ignoring unindexed/unstored field: " + field);
+ return null;
+ }
+ org.apache.lucene.document.FieldType newType = new org.apache.lucene.document.FieldType();
+ newType.setIndexed(field.indexed());
+ newType.setTokenized(field.isTokenized());
+ newType.setStored(field.stored());
+ newType.setOmitNorms(field.omitNorms());
+ IndexOptions options = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
+ if (field.omitTermFreqAndPositions()) {
+ options = IndexOptions.DOCS_ONLY;
+ } else if (field.omitPositions()) {
+ options = IndexOptions.DOCS_AND_FREQS;
+ } else if (field.storeOffsetsWithPositions()) {
+ options = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
+ }
+ newType.setIndexOptions(options);
+ newType.setStoreTermVectors(field.storeTermVector());
+ newType.setStoreTermVectorOffsets(field.storeTermOffsets());
+ newType.setStoreTermVectorPositions(field.storeTermPositions());
+ return newType;
+ }
+
+ /**
* This is a simple holder of a stored part and the collected states (tokens with attributes).
*/
public static class ParseResult {
@@ -165,19 +200,44 @@ public class PreAnalyzedField extends Fi
}
PreAnalyzedTokenizer parse = new PreAnalyzedTokenizer(new StringReader(val), parser);
parse.reset(); // consume
- Field f = (Field)super.createField(field, val, boost);
+ org.apache.lucene.document.FieldType type = createFieldType(field);
+ if (type == null) {
+ parse.close();
+ return null;
+ }
+ Field f = null;
if (parse.getStringValue() != null) {
- f.setStringValue(parse.getStringValue());
+ if (field.stored()) {
+ f = new Field(field.getName(), parse.getStringValue(), type);
+ } else {
+ type.setStored(false);
+ }
} else if (parse.getBinaryValue() != null) {
- f.setBytesValue(parse.getBinaryValue());
+ if (field.isBinary()) {
+ f = new Field(field.getName(), parse.getBinaryValue(), type);
+ }
} else {
- f.fieldType().setStored(false);
+ type.setStored(false);
}
if (parse.hasTokenStream()) {
- f.fieldType().setIndexed(true);
- f.fieldType().setTokenized(true);
- f.setTokenStream(parse);
+ if (field.indexed()) {
+ type.setIndexed(true);
+ type.setTokenized(true);
+ if (f != null) {
+ f.setTokenStream(parse);
+ } else {
+ f = new Field(field.getName(), parse, type);
+ }
+ } else {
+ if (f != null) {
+ f.fieldType().setIndexed(false);
+ f.fieldType().setTokenized(false);
+ }
+ }
+ }
+ if (f != null) {
+ f.setBoost(boost);
}
return f;
}
Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java?rev=1464902&r1=1464901&r2=1464902&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java Fri Apr 5 09:48:27 2013
@@ -177,6 +177,12 @@ public class DocumentBuilder {
private static void addField(Document doc, SchemaField field, Object val, float boost) {
+ if (val instanceof IndexableField) {
+ // set boost to the calculated compound boost
+ ((Field)val).setBoost(boost);
+ doc.add((Field)val);
+ return;
+ }
for (IndexableField f : field.getType().createFields(field, val, boost)) {
if (f != null) doc.add((Field) f); // null fields are not added
}
Copied: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/processor/PreAnalyzedUpdateProcessorFactory.java (from r1464889, lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/PreAnalyzedUpdateProcessorFactory.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/processor/PreAnalyzedUpdateProcessorFactory.java?p2=lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/processor/PreAnalyzedUpdateProcessorFactory.java&p1=lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/PreAnalyzedUpdateProcessorFactory.java&r1=1464889&r2=1464902&rev=1464902&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/PreAnalyzedUpdateProcessorFactory.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/processor/PreAnalyzedUpdateProcessorFactory.java Fri Apr 5 09:48:27 2013
@@ -1,15 +1,12 @@
package org.apache.solr.update.processor;
import java.util.HashMap;
-import java.util.LinkedList;
-import java.util.List;
import java.util.Map;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
-import org.apache.lucene.index.FieldInfo.IndexOptions;
-import org.apache.lucene.index.StorableField;
+import org.apache.lucene.index.IndexableField;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
@@ -46,7 +43,7 @@ import org.apache.solr.schema.SimplePreA
* <p>Fields are specified using the same patterns as in {@link FieldMutatingUpdateProcessorFactory}.
* They are then checked whether they follow a pre-analyzed format defined by <code>parser</code>.
* Valid fields are then parsed. The original {@link SchemaField} is used for the initial
- * creation of {@link StorableField}, which is then modified to add the results from
+ * creation of {@link IndexableField}, which is then modified to add the results from
* parsing (token stream value and/or string value) and then it will be directly added to
* the final Lucene {@link Document} to be indexed.</p>
* <p>Fields that are declared in the patterns list but are not present
@@ -67,7 +64,7 @@ import org.apache.solr.schema.SimplePreA
* uses the "simple" parser ({@link SimplePreAnalyzedParser}) and one that uses
* the "json" parser ({@link JsonPreAnalyzedParser}). Field "nonexistent" will be
* removed from input documents if not present in the schema. Other fields will be
- * analyzed and if valid they will be converted to {@link StorableField}-s or if
+ * analyzed and if valid they will be converted to {@link IndexableField}-s or if
* they are not in a valid format that can be parsed with the selected parser they
* will be passed as-is. Assuming that <code>ssto</code> field is stored but not
* indexed, and <code>sind</code> field is indexed but not stored: if
Modified: lucene/dev/branches/branch_4x/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml?rev=1464902&r1=1464901&r2=1464902&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml Fri Apr 5 09:48:27 2013
@@ -390,4 +390,30 @@
</processor>
</updateRequestProcessorChain>
+ <updateRequestProcessorChain name="pre-analyzed-simple">
+ <processor class="solr.PreAnalyzedUpdateProcessorFactory">
+ <str name="fieldName">subject</str>
+ <str name="fieldName">title</str>
+ <str name="fieldName">teststop</str>
+ <str name="fieldName">nonexistent</str>
+ <str name="fieldName">ssto</str>
+ <str name="fieldName">sind</str>
+ <str name="parser">simple</str>
+ </processor>
+ <processor class="solr.RunUpdateProcessorFactory" />
+ </updateRequestProcessorChain>
+
+ <updateRequestProcessorChain name="pre-analyzed-json">
+ <processor class="solr.PreAnalyzedUpdateProcessorFactory">
+ <str name="fieldName">subject</str>
+ <str name="fieldName">title</str>
+ <str name="fieldName">teststop</str>
+ <str name="fieldName">nonexistent</str>
+ <str name="fieldName">ssto</str>
+ <str name="fieldName">sind</str>
+ <str name="parser">json</str>
+ </processor>
+ <processor class="solr.RunUpdateProcessorFactory" />
+ </updateRequestProcessorChain>
+
</config>
Modified: lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/schema/PreAnalyzedFieldTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/schema/PreAnalyzedFieldTest.java?rev=1464902&r1=1464901&r2=1464902&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/schema/PreAnalyzedFieldTest.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/schema/PreAnalyzedFieldTest.java Fri Apr 5 09:48:27 2013
@@ -22,11 +22,13 @@ import java.util.HashMap;
import org.apache.lucene.document.Field;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.util.Base64;
import org.apache.solr.schema.PreAnalyzedField.PreAnalyzedParser;
+import org.junit.BeforeClass;
import org.junit.Test;
-public class PreAnalyzedFieldTest extends LuceneTestCase {
+public class PreAnalyzedFieldTest extends SolrTestCaseJ4 {
private static final String[] valid = {
"1 one two three", // simple parsing
@@ -70,6 +72,11 @@ public class PreAnalyzedFieldTest extend
int props =
FieldProperties.INDEXED | FieldProperties.STORED;
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ initCore("solrconfig.xml","schema.xml");
+ }
+
@Override
public void setUp() throws Exception {
super.setUp();
@@ -82,7 +89,7 @@ public class PreAnalyzedFieldTest extend
// use Simple format
HashMap<String,String> args = new HashMap<String,String>();
args.put(PreAnalyzedField.PARSER_IMPL, SimplePreAnalyzedParser.class.getName());
- paf.init((IndexSchema)null, args);
+ paf.init(h.getCore().getSchema(), args);
PreAnalyzedParser parser = new SimplePreAnalyzedParser();
for (int i = 0; i < valid.length; i++) {
String s = valid[i];
@@ -100,7 +107,7 @@ public class PreAnalyzedFieldTest extend
@Test
public void testInvalidSimple() {
PreAnalyzedField paf = new PreAnalyzedField();
- paf.init((IndexSchema)null, Collections.<String,String>emptyMap());
+ paf.init(h.getCore().getSchema(), Collections.<String,String>emptyMap());
for (String s : invalid) {
try {
paf.fromString(field, s, 1.0f);
@@ -125,7 +132,7 @@ public class PreAnalyzedFieldTest extend
// use Simple format
HashMap<String,String> args = new HashMap<String,String>();
args.put(PreAnalyzedField.PARSER_IMPL, SimplePreAnalyzedParser.class.getName());
- paf.init((IndexSchema)null, args);
+ paf.init(h.getCore().getSchema(), args);
try {
Field f = (Field)paf.fromString(field, valid[0], 1.0f);
} catch (Exception e) {
@@ -133,7 +140,7 @@ public class PreAnalyzedFieldTest extend
}
// use JSON format
args.put(PreAnalyzedField.PARSER_IMPL, JsonPreAnalyzedParser.class.getName());
- paf.init((IndexSchema)null, args);
+ paf.init(h.getCore().getSchema(), args);
try {
Field f = (Field)paf.fromString(field, valid[0], 1.0f);
fail("Should fail JSON parsing: '" + valid[0]);