You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2013/04/05 10:46:13 UTC
svn commit: r1464889 - in /lucene/dev/trunk/solr: ./ core/src/java/org/apache/solr/schema/ core/src/java/org/apache/solr/update/ core/src/java/org/apache/solr/update/processor/ core/src/test-files/solr/collection1/conf/ core/src/test/org/apache/solr/sc...

Author: ab
Date: Fri Apr  5 08:46:12 2013
New Revision: 1464889

URL: http://svn.apache.org/r1464889
Log:
SOLR-4648 Add PreAnalyzedUpdateProcessorFactory.

Added:
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/PreAnalyzedUpdateProcessorFactory.java   (with props)
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/processor/PreAnalyzedUpdateProcessorTest.java   (with props)
Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
    lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/schema/PreAnalyzedFieldTest.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1464889&r1=1464888&r2=1464889&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Fri Apr  5 08:46:12 2013
@@ -80,6 +80,10 @@ Detailed Change List
 New Features
 ----------------------
 
+* SOLR-4648 PreAnalyzedUpdateProcessorFactory allows using the functionality
+  of PreAnalyzedField with other field types. See javadoc for details and
+  examples. (Andrzej Bialecki)
+
 * SOLR-4196 (and others). Solr.xml is being deprecated in favor of a simple
   properties file. In the absence of a <solr_home>/solr.xml but the presence of
   <solr_home>/solr.properties, two things will happen

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java?rev=1464889&r1=1464888&r2=1464889&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java Fri Apr  5 08:46:12 2013
@@ -28,9 +28,8 @@ import java.util.Map;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.document.Field;
-import org.apache.lucene.index.GeneralField;
-import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.StorableField;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.AttributeSource.State;
@@ -57,23 +56,27 @@ public class PreAnalyzedField extends Fi
   private PreAnalyzedParser parser;
   
   @Override
-  protected void init(IndexSchema schema, Map<String, String> args) {
+  public void init(IndexSchema schema, Map<String, String> args) {
     super.init(schema, args);
     String implName = args.get(PARSER_IMPL);
     if (implName == null) {
       parser = new JsonPreAnalyzedParser();
     } else {
-      try {
-        Class<?> implClazz = Class.forName(implName);
-        if (!PreAnalyzedParser.class.isAssignableFrom(implClazz)) {
-          throw new Exception("must implement " + PreAnalyzedParser.class.getName());
-        }
-        Constructor<?> c = implClazz.getConstructor(new Class<?>[0]);
-        parser = (PreAnalyzedParser) c.newInstance(new Object[0]);
-      } catch (Exception e) {
-        LOG.warn("Can't use the configured PreAnalyzedParser class '" + implName + "' (" +
-            e.getMessage() + "), using default " + DEFAULT_IMPL);
+      // short name
+      if ("json".equalsIgnoreCase(implName)) {
         parser = new JsonPreAnalyzedParser();
+      } else if ("simple".equalsIgnoreCase(implName)) {
+        parser = new SimplePreAnalyzedParser();
+      } else {
+        try {
+          Class<? extends PreAnalyzedParser> implClazz = schema.getResourceLoader().findClass(implName, PreAnalyzedParser.class);
+          Constructor<?> c = implClazz.getConstructor(new Class<?>[0]);
+          parser = (PreAnalyzedParser) c.newInstance(new Object[0]);
+        } catch (Exception e) {
+          LOG.warn("Can't use the configured PreAnalyzedParser class '" + implName +
+              "', using default " + DEFAULT_IMPL, e);
+          parser = new JsonPreAnalyzedParser();
+        }
       }
     }
   }
@@ -102,7 +105,7 @@ public class PreAnalyzedField extends Fi
     try {
       f = fromString(field, String.valueOf(value), boost);
     } catch (Exception e) {
-      e.printStackTrace();
+      LOG.warn("Error parsing pre-analyzed field '" + field.getName() + "'", e);
       return null;
     }
     return f;
@@ -130,6 +133,36 @@ public class PreAnalyzedField extends Fi
   }
   
   /**
+   * Utility method to create a {@link org.apache.lucene.document.FieldType}
+   * based on the {@link SchemaField}
+   */
+  public static org.apache.lucene.document.FieldType createFieldType(SchemaField field) {
+    if (!field.indexed() && !field.stored()) {
+      if (log.isTraceEnabled())
+        log.trace("Ignoring unindexed/unstored field: " + field);
+      return null;
+    }
+    org.apache.lucene.document.FieldType newType = new org.apache.lucene.document.FieldType();
+    newType.setIndexed(field.indexed());
+    newType.setTokenized(field.isTokenized());
+    newType.setStored(field.stored());
+    newType.setOmitNorms(field.omitNorms());
+    IndexOptions options = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
+    if (field.omitTermFreqAndPositions()) {
+      options = IndexOptions.DOCS_ONLY;
+    } else if (field.omitPositions()) {
+      options = IndexOptions.DOCS_AND_FREQS;
+    } else if (field.storeOffsetsWithPositions()) {
+      options = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
+    }
+    newType.setIndexOptions(options);
+    newType.setStoreTermVectors(field.storeTermVector());
+    newType.setStoreTermVectorOffsets(field.storeTermOffsets());
+    newType.setStoreTermVectorPositions(field.storeTermPositions());
+    return newType;
+  }
+  
+  /**
    * This is a simple holder of a stored part and the collected states (tokens with attributes).
    */
   public static class ParseResult {
@@ -167,19 +200,44 @@ public class PreAnalyzedField extends Fi
     }
     PreAnalyzedTokenizer parse = new PreAnalyzedTokenizer(new StringReader(val), parser);
     parse.reset(); // consume
-    Field f = (Field)super.createField(field, val, boost);
+    org.apache.lucene.document.FieldType type = createFieldType(field);
+    if (type == null) {
+      parse.close();
+      return null;
+    }
+    Field f = null;
     if (parse.getStringValue() != null) {
-      f.setStringValue(parse.getStringValue());
+      if (field.stored()) {
+        f = new Field(field.getName(), parse.getStringValue(), type);
+      } else {
+        type.setStored(false);
+      }
     } else if (parse.getBinaryValue() != null) {
-      f.setBytesValue(parse.getBinaryValue());
+      if (field.isBinary()) {
+        f = new Field(field.getName(), parse.getBinaryValue(), type);
+      }
     } else {
-      f.fieldType().setStored(false);
+      type.setStored(false);
     }
     
     if (parse.hasTokenStream()) {
-      f.fieldType().setIndexed(true);
-      f.fieldType().setTokenized(true);
-      f.setTokenStream(parse);
+      if (field.indexed()) {
+        type.setIndexed(true);
+        type.setTokenized(true);
+        if (f != null) {
+          f.setTokenStream(parse);
+        } else {
+          f = new Field(field.getName(), parse, type);
+        }
+      } else {
+        if (f != null) {
+          f.fieldType().setIndexed(false);
+          f.fieldType().setTokenized(false);
+        }
+      }
+    }
+    if (f != null) {
+      f.setBoost(boost);
     }
     return f;
   }

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java?rev=1464889&r1=1464888&r2=1464889&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java Fri Apr  5 08:46:12 2013
@@ -175,6 +175,12 @@ public class DocumentBuilder {
 
 
   private static void addField(Document doc, SchemaField field, Object val, float boost) {
+    if (val instanceof StorableField) {
+      // set boost to the calculated compound boost
+      ((Field)val).setBoost(boost);
+      doc.add((Field)val);
+      return;
+    }
     for (StorableField f : field.getType().createFields(field, val, boost)) {
       if (f != null) doc.add((Field) f); // null fields are not added
     }

Added: lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/PreAnalyzedUpdateProcessorFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/PreAnalyzedUpdateProcessorFactory.java?rev=1464889&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/PreAnalyzedUpdateProcessorFactory.java (added)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/PreAnalyzedUpdateProcessorFactory.java Fri Apr  5 08:46:12 2013
@@ -0,0 +1,172 @@
+package org.apache.solr.update.processor;
+
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.index.StorableField;
+import org.apache.solr.common.SolrInputField;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.schema.JsonPreAnalyzedParser;
+import org.apache.solr.schema.PreAnalyzedField;
+import org.apache.solr.schema.PreAnalyzedField.PreAnalyzedParser;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.schema.SimplePreAnalyzedParser;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * <p>An update processor that parses configured fields of any document being added
+ * using {@link PreAnalyzedField} with the configured format parser.</p>
+ * 
+ * <p>Fields are specified using the same patterns as in {@link FieldMutatingUpdateProcessorFactory}.
+ * They are then checked whether they follow a pre-analyzed format defined by <code>parser</code>.
+ * Valid fields are then parsed. The original {@link SchemaField} is used for the initial
+ * creation of {@link StorableField}, which is then modified to add the results from
+ * parsing (token stream value and/or string value) and then it will be directly added to
+ * the final Lucene {@link Document} to be indexed.</p>
+ * <p>Fields that are declared in the patterns list but are not present
+ * in the current schema will be removed from the input document.</p>
+ * <h3>Implementation details</h3>
+ * <p>This update processor uses {@link PreAnalyzedParser}
+ * to parse the original field content (interpreted as a string value), and thus
+ * obtain the stored part and the token stream part. Then it creates the "template"
+ * {@link Field}-s using the original {@link SchemaField#createFields(Object, float)}
+ * as declared in the current schema. Finally it sets the pre-analyzed parts if
+ * available (string value and the token
+ * stream value) on the first field of these "template" fields. If the declared
+ * field type does not support stored or indexed parts then such parts are silently
+ * discarded. Finally the updated "template" {@link Field}-s are added to the resulting
+ * {@link SolrInputField}, and the original value of that field is removed.</p>
+ * <h3>Example configuration</h3>
+ * <p>In the example configuration below there are two update chains, one that
+ * uses the "simple" parser ({@link SimplePreAnalyzedParser}) and one that uses
+ * the "json" parser ({@link JsonPreAnalyzedParser}). Field "nonexistent" will be
+ * removed from input documents if not present in the schema. Other fields will be
+ * analyzed and if valid they will be converted to {@link StorableField}-s or if
+ * they are not in a valid format that can be parsed with the selected parser they
+ * will be passed as-is. Assuming that <code>ssto</code> field is stored but not
+ * indexed, and <code>sind</code> field is indexed but not stored: if
+ * <code>ssto</code> input value contains the indexed part then this part will
+ * be discarded and only the stored value part will be retained. Similarly,
+ * if <code>sind</code> input value contains the stored part then it
+ * will be discarded and only the token stream part will be retained.</p>
+ * 
+ *  <pre class="prettyprint">
+ *   &lt;updateRequestProcessorChain name="pre-analyzed-simple"&gt;
+ *    &lt;processor class="solr.PreAnalyzedUpdateProcessorFactory"&gt;
+ *      &lt;str name="fieldName"&gt;title&lt;/str&gt;
+ *      &lt;str name="fieldName"&gt;nonexistent&lt;/str&gt;
+ *      &lt;str name="fieldName"&gt;ssto&lt;/str&gt;
+ *      &lt;str name="fieldName"&gt;sind&lt;/str&gt;
+ *      &lt;str name="parser"&gt;simple&lt;/str&gt;
+ *    &lt;/processor&gt;
+ *    &lt;processor class="solr.RunUpdateProcessorFactory" /&gt;
+ *  &lt;/updateRequestProcessorChain&gt;
+ *
+ *  &lt;updateRequestProcessorChain name="pre-analyzed-json"&gt;
+ *    &lt;processor class="solr.PreAnalyzedUpdateProcessorFactory"&gt;
+ *      &lt;str name="fieldName"&gt;title&lt;/str&gt;
+ *      &lt;str name="fieldName"&gt;nonexistent&lt;/str&gt;
+ *      &lt;str name="fieldName"&gt;ssto&lt;/str&gt;
+ *      &lt;str name="fieldName"&gt;sind&lt;/str&gt;
+ *      &lt;str name="parser"&gt;json&lt;/str&gt;
+ *    &lt;/processor&gt;
+ *    &lt;processor class="solr.RunUpdateProcessorFactory" /&gt;
+ *  &lt;/updateRequestProcessorChain&gt;
+ *  </pre>
+ *
+ */
+public class PreAnalyzedUpdateProcessorFactory extends FieldMutatingUpdateProcessorFactory {
+  
+  private PreAnalyzedField parser;
+  private String parserImpl;
+
+  @Override
+  public void init(final NamedList args) {
+    parserImpl = (String)args.get("parser");
+    args.remove("parser");
+    // initialize inclusion / exclusion patterns
+    super.init(args);
+  }
+  
+  @Override
+  public UpdateRequestProcessor getInstance(SolrQueryRequest req,
+      SolrQueryResponse rsp, UpdateRequestProcessor next) {
+    return new PreAnalyzedUpdateProcessor(getSelector(), next, req.getSchema(), parser);
+  }
+
+  @Override
+  public void inform(SolrCore core) {
+    super.inform(core);
+    parser = new PreAnalyzedField();
+    Map<String,String> args = new HashMap<String,String>();
+    if (parserImpl != null) {
+      args.put(PreAnalyzedField.PARSER_IMPL, parserImpl);
+    }
+    parser.init(core.getSchema(), args);
+  }  
+}
+
+class PreAnalyzedUpdateProcessor extends FieldMutatingUpdateProcessor {
+  
+  private PreAnalyzedField parser;
+  private IndexSchema schema;
+
+  public PreAnalyzedUpdateProcessor(FieldNameSelector sel, UpdateRequestProcessor next, IndexSchema schema, PreAnalyzedField parser) {
+    super(sel, next);
+    this.schema = schema;
+    this.parser = parser;
+  }
+
+  @Override
+  protected SolrInputField mutate(SolrInputField src) {
+    SchemaField sf = schema.getFieldOrNull(src.getName());
+    if (sf == null) { // remove this field
+      return null;
+    }
+    FieldType type = PreAnalyzedField.createFieldType(sf);
+    if (type == null) { // neither indexed nor stored - skip
+      return null;
+    }
+    SolrInputField res = new SolrInputField(src.getName());
+    res.setBoost(src.getBoost());
+    for (Object o : src) {
+      if (o == null) {
+        continue;
+      }
+      Field pre = (Field)parser.createField(sf, o, 1.0f);
+      if (pre != null) {
+        res.addValue(pre, 1.0f);
+      } else { // restore the original value
+        log.warn("Could not parse field {} - using original value as is: {}", src.getName(), o);
+        res.addValue(o, 1.0f);
+      }
+    }
+    return res;
+  }  
+}

Modified: lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml?rev=1464889&r1=1464888&r2=1464889&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml (original)
+++ lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml Fri Apr  5 08:46:12 2013
@@ -390,4 +390,30 @@
     </processor>
   </updateRequestProcessorChain>
 
+  <updateRequestProcessorChain name="pre-analyzed-simple">
+    <processor class="solr.PreAnalyzedUpdateProcessorFactory">
+      <str name="fieldName">subject</str>
+      <str name="fieldName">title</str>
+      <str name="fieldName">teststop</str>
+      <str name="fieldName">nonexistent</str>
+      <str name="fieldName">ssto</str>
+      <str name="fieldName">sind</str>
+      <str name="parser">simple</str>
+    </processor>
+    <processor class="solr.RunUpdateProcessorFactory" />
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="pre-analyzed-json">
+    <processor class="solr.PreAnalyzedUpdateProcessorFactory">
+      <str name="fieldName">subject</str>
+      <str name="fieldName">title</str>
+      <str name="fieldName">teststop</str>
+      <str name="fieldName">nonexistent</str>
+      <str name="fieldName">ssto</str>
+      <str name="fieldName">sind</str>
+      <str name="parser">json</str>
+    </processor>
+    <processor class="solr.RunUpdateProcessorFactory" />
+  </updateRequestProcessorChain>
+
 </config>

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/schema/PreAnalyzedFieldTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/schema/PreAnalyzedFieldTest.java?rev=1464889&r1=1464888&r2=1464889&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/schema/PreAnalyzedFieldTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/schema/PreAnalyzedFieldTest.java Fri Apr  5 08:46:12 2013
@@ -22,11 +22,13 @@ import java.util.HashMap;
 
 import org.apache.lucene.document.Field;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.common.util.Base64;
 import org.apache.solr.schema.PreAnalyzedField.PreAnalyzedParser;
+import org.junit.BeforeClass;
 import org.junit.Test;
 
-public class PreAnalyzedFieldTest extends LuceneTestCase {
+public class PreAnalyzedFieldTest extends SolrTestCaseJ4 {
   
   private static final String[] valid = {
     "1 one two three",                       // simple parsing
@@ -70,6 +72,11 @@ public class PreAnalyzedFieldTest extend
   int props = 
     FieldProperties.INDEXED | FieldProperties.STORED;
   
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    initCore("solrconfig.xml","schema.xml");
+  }
+
   @Override
   public void setUp() throws Exception {
     super.setUp();
@@ -82,7 +89,7 @@ public class PreAnalyzedFieldTest extend
     // use Simple format
     HashMap<String,String> args = new HashMap<String,String>();
     args.put(PreAnalyzedField.PARSER_IMPL, SimplePreAnalyzedParser.class.getName());
-    paf.init((IndexSchema)null, args);
+    paf.init(h.getCore().getSchema(), args);
     PreAnalyzedParser parser = new SimplePreAnalyzedParser();
     for (int i = 0; i < valid.length; i++) {
       String s = valid[i];
@@ -100,7 +107,7 @@ public class PreAnalyzedFieldTest extend
   @Test
   public void testInvalidSimple() {
     PreAnalyzedField paf = new PreAnalyzedField();
-    paf.init((IndexSchema)null, Collections.<String,String>emptyMap());
+    paf.init(h.getCore().getSchema(), Collections.<String,String>emptyMap());
     for (String s : invalid) {
       try {
         paf.fromString(field, s, 1.0f);
@@ -125,7 +132,7 @@ public class PreAnalyzedFieldTest extend
     // use Simple format
     HashMap<String,String> args = new HashMap<String,String>();
     args.put(PreAnalyzedField.PARSER_IMPL, SimplePreAnalyzedParser.class.getName());
-    paf.init((IndexSchema)null, args);
+    paf.init(h.getCore().getSchema(), args);
     try {
       Field f = (Field)paf.fromString(field, valid[0], 1.0f);
     } catch (Exception e) {
@@ -133,7 +140,7 @@ public class PreAnalyzedFieldTest extend
     }
     // use JSON format
     args.put(PreAnalyzedField.PARSER_IMPL, JsonPreAnalyzedParser.class.getName());
-    paf.init((IndexSchema)null, args);
+    paf.init(h.getCore().getSchema(), args);
     try {
       Field f = (Field)paf.fromString(field, valid[0], 1.0f);
       fail("Should fail JSON parsing: '" + valid[0]);

Added: lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/processor/PreAnalyzedUpdateProcessorTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/processor/PreAnalyzedUpdateProcessorTest.java?rev=1464889&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/processor/PreAnalyzedUpdateProcessorTest.java (added)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/processor/PreAnalyzedUpdateProcessorTest.java Fri Apr  5 08:46:12 2013
@@ -0,0 +1,120 @@
+package org.apache.solr.update.processor;
+
+import org.apache.lucene.document.Field;
+import org.apache.solr.common.SolrInputDocument;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class PreAnalyzedUpdateProcessorTest extends UpdateProcessorTestBase {
+  String[] simpleTitle = new String[] {
+      "not pre-analyzed",
+      "1 =string value=foo bar"
+  };
+  String[] jsonTitle = new String[] {
+    "not pre-analyzed",
+    "{\"v\":\"1\",\"str\":\"string value\",\"tokens\":[{\"t\":\"foo\"},{\"t\":\"bar\"}]}",
+  };
+  String[] simpleTeststop = new String[] {
+      "1 =this is a test.=one two three",
+      "1 =this is a test.=three four five"
+  };
+  String[] jsonTeststop = new String[] {
+      "{\"v\":\"1\",\"str\":\"this is a test.\",\"tokens\":[{\"t\":\"one\"},{\"t\":\"two\"},{\"t\":\"three\"}]}",
+      "{\"v\":\"1\",\"str\":\"this is a test.\",\"tokens\":[{\"t\":\"three\"},{\"t\":\"four\"},{\"t\":\"five\"}]}",
+  };
+  
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    initCore("solrconfig-update-processor-chains.xml", "schema12.xml");
+  }
+
+  @Test
+  public void testSimple() throws Exception {
+    test("pre-analyzed-simple", simpleTitle, simpleTeststop);
+  }
+  
+  @Test
+  public void testJson() throws Exception {
+    test("pre-analyzed-json", jsonTitle, jsonTeststop);
+  }
+
+  private void test(String chain, String[] title, String[] teststop) throws Exception {
+    SolrInputDocument doc = processAdd(chain,
+        doc(f("id", "1"),
+            f("title", title[0]),
+            f("teststop", teststop[0]),
+            f("nonexistent", "foobar"),
+            f("ssto", teststop[0]),
+            f("sind", teststop[0])));
+    assertEquals("title should be unchanged", title[0], doc.getFieldValue("title"));
+    assertTrue("teststop should be a Field", doc.getFieldValue("teststop") instanceof Field);
+    Field f = (Field)doc.getFieldValue("teststop");
+    assertEquals("teststop should have stringValue", "this is a test.", f.stringValue());
+    assertNotNull("teststop should have tokensStreamValue", f.tokenStreamValue());
+    assertNull("nonexistent should be dropped", doc.getField("nonexistent"));
+    // check how SchemaField type affects stored/indexed part processing
+    f = (Field)doc.getFieldValue("ssto");
+    assertNotNull("should have ssto", f);
+    assertNotNull("should have stringValue", f.stringValue());
+    assertNull("should not have tokenStreamValue", f.tokenStreamValue());
+    f = (Field)doc.getFieldValue("sind");
+    assertNotNull("should have sind", f);
+    assertNull("should not have stringValue: '" + f.stringValue() + "'", f.stringValue());
+    assertNotNull("should have tokenStreamValue", f.tokenStreamValue());
+    
+    doc = processAdd(chain,
+        doc(f("id", "2"),
+            f("title", title[1]),
+            f("teststop", teststop[1]),
+            f("nonexistent", "foobar"),
+            f("ssto", teststop[1]),
+            f("sind", teststop[1])));
+    assertTrue("title should be a Field", doc.getFieldValue("title") instanceof Field);
+    assertTrue("teststop should be a Field", doc.getFieldValue("teststop") instanceof Field);
+    f = (Field)doc.getFieldValue("teststop");
+    assertEquals("teststop should have stringValue", "this is a test.", f.stringValue());
+    assertNotNull("teststop should have tokensStreamValue", f.tokenStreamValue());
+    assertNull("nonexistent should be dropped", doc.getField("nonexistent"));
+    // check how SchemaField type affects stored/indexed part processing
+    f = (Field)doc.getFieldValue("ssto");
+    assertNotNull("should have ssto", f);
+    assertNotNull("should have stringValue", f.stringValue());
+    assertNull("should not have tokenStreamValue", f.tokenStreamValue());
+    f = (Field)doc.getFieldValue("sind");
+    assertNotNull("should have sind", f);
+    assertNull("should not have stringValue: '" + f.stringValue() + "'", f.stringValue());
+    assertNotNull("should have tokenStreamValue", f.tokenStreamValue());
+    
+    assertU(commit());
+    assertQ(req("teststop:\"one two three\"")
+        ,"//str[@name='id'][.='1']"
+        ,"//str[@name='teststop'][.='this is a test.']"
+        );
+    assertQ(req("teststop:three")
+        ,"//*[@numFound='2']"
+        ,"//result/doc[1]/str[@name='id'][.='1']"
+        ,"//result/doc[1]/str[@name='title'][.='not pre-analyzed']"
+        ,"//result/doc[2]/str[@name='id'][.='2']"
+        ,"//result/doc[2]/arr[@name='title']/str[.='string value']"
+        );
+    assertQ(req("ssto:three"), "//*[@numFound='0']");
+    assertQ(req("sind:three"), "//*[@numFound='2']");
+  }  
+}