You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ho...@apache.org on 2012/06/14 00:57:49 UTC

svn commit: r1350050 - in /lucene/dev/trunk/solr: ./ core/src/java/org/apache/solr/update/processor/ core/src/test-files/solr/conf/ core/src/test/org/apache/solr/update/processor/

Author: hossman
Date: Wed Jun 13 22:57:48 2012
New Revision: 1350050

URL: http://svn.apache.org/viewvc?rev=1350050&view=rev
Log:
SOLR-2599: CloneFieldUpdateProcessorFactory

Added:
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/CloneFieldUpdateProcessorFactory.java   (with props)
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/IgnoreFieldUpdateProcessorFactory.java   (with props)
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/TruncateFieldUpdateProcessorFactory.java   (with props)
Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/FieldMutatingUpdateProcessorFactory.java
    lucene/dev/trunk/solr/core/src/test-files/solr/conf/solrconfig-update-processor-chains.xml
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/processor/FieldMutatingUpdateProcessorTest.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1350050&r1=1350049&r2=1350050&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Wed Jun 13 22:57:48 2012
@@ -252,6 +252,8 @@ New Features
       LastFieldValueUpdateProcessorFactory
       MinFieldValueUpdateProcessorFactory
       MaxFieldValueUpdateProcessorFactory
+      TruncateFieldUpdateProcessorFactory
+      IgnoreFieldUpdateProcessorFactory 
   (hossman, janhoy)
 
 * SOLR-3120: Optional post filtering for spatial queries bbox and geofilt
@@ -357,6 +359,10 @@ New Features
   in cases where testing spellcheck collations for result counts should use different
   parameters from the main query (James Dyer)
 
+* SOLR-2599: CloneFieldUpdateProcessorFactory provides similar functionality 
+  to schema.xml's <copyField/> declaration but as an update processor that can 
+  be combined with other processors in any order. (Jan Høydahl & hossman)
+
 Optimizations
 ----------------------
 

Added: lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/CloneFieldUpdateProcessorFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/CloneFieldUpdateProcessorFactory.java?rev=1350050&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/CloneFieldUpdateProcessorFactory.java (added)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/CloneFieldUpdateProcessorFactory.java Wed Jun 13 22:57:48 2012
@@ -0,0 +1,265 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.update.processor;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.HashSet;
+
+import org.apache.solr.schema.IndexSchema;
+
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.core.SolrResourceLoader;
+import org.apache.solr.util.plugin.SolrCoreAware;
+
+import org.apache.solr.common.util.NamedList;
+
+import org.apache.solr.common.SolrInputField;
+import org.apache.solr.common.SolrInputDocument;
+
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrException.ErrorCode;
+import static org.apache.solr.common.SolrException.ErrorCode.*;
+
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.SolrQueryResponse;
+
+import org.apache.solr.update.AddUpdateCommand;
+
+import org.apache.solr.update.processor.FieldMutatingUpdateProcessorFactory;
+import org.apache.solr.update.processor.FieldMutatingUpdateProcessorFactory.SelectorParams;
+import org.apache.solr.update.processor.FieldMutatingUpdateProcessor.FieldNameSelector;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Clones the values found in any matching <code>source</code> field into 
+ * the configured <code>dest<code> field.
+ * <p>
+ * While the <code>dest<code> field must be a single <code>&lt;str&gt;</code>, 
+ * the <code>source</code> fields can be configured as either:
+ * </p>
+ * <ul>
+ *  <li>One or more <code>&lt;str&gt;</code></li>
+ *  <li>An <code>&lt;arr&gt;</code> of <code>&lt;str&gt;</code></li>
+ *  <li>A <code>&lt;lst&gt;</code> containing {@link FieldMutatingUpdateProcessorFactory FieldMutatingUpdateProcessorFactory style selector arguments}</li>
+ * </ul>
+ * <p>
+ * If the <code>dest</code> field already exists in the document, then the 
+ * values from the <code>source</code> fields will be added to it.  The 
+ * "boost" value associated with the <code>dest</code> will not be changed, 
+ * and any bost specified on the <code>source</code> fields will be ignored.  
+ * (If the <code>dest</code> field did not exist prior to this processor, the 
+ * newly created <code>dest</code> field will have the default boost of 1.0)
+ * </p>
+ * <p>
+ * In the example below, the <code>category</code> field will be cloned 
+ * into the <code>category_s</code> field, both the <code>authors</code> and 
+ * <code>editors</code> fields will be cloned into the <code>contributors</code>
+ * field, and any field with a name ending in <code>_price</code> -- except for 
+ * <code>list_price</code> -- will be cloned into the <code>all_prices</code> 
+ * field. 
+ * </p>
+ * <!-- see solrconfig-update-processors-chains.xml for where this is tested -->
+ * <pre class="prettyprint">
+ *   &lt;updateRequestProcessorChain name="multiple-clones"&gt;
+ *     &lt;processor class="solr.CloneFieldUpdateProcessorFactory"&gt;
+ *       &lt;str name="source"&gt;category&lt;/str&gt;
+ *       &lt;str name="dest"&gt;category_s&lt;/str&gt;
+ *     &lt;/processor&gt;
+ *     &lt;processor class="solr.CloneFieldUpdateProcessorFactory"&gt;
+ *       &lt;arr name="source"&gt;
+ *         &lt;str&gt;authors&lt;/str&gt;
+ *         &lt;str&gt;editors&lt;/str&gt;
+ *       &lt;/arr&gt;
+ *       &lt;str name="dest"&gt;contributors&lt;/str&gt;
+ *     &lt;/processor&gt;
+ *     &lt;processor class="solr.CloneFieldUpdateProcessorFactory"&gt;
+ *       &lt;lst name="source"&gt;
+ *         &lt;str name="fieldRegex"&gt;.*_price&lt;/str&gt;
+ *         &lt;lst name="exclude"&gt;
+ *           &lt;str name="fieldName"&gt;list_price&lt;/str&gt;
+ *         &lt;/lst&gt;
+ *       &lt;/lst&gt;
+ *       &lt;str name="dest"&gt;all_prices&lt;/str&gt;
+ *     &lt;/processor&gt;
+ *   &lt;/updateRequestProcessorChain&gt;
+ * </pre>
+ */
+public class CloneFieldUpdateProcessorFactory 
+  extends UpdateRequestProcessorFactory implements SolrCoreAware {
+  
+  private final static Logger log = LoggerFactory.getLogger(CloneFieldUpdateProcessorFactory.class);
+  
+  public static final String SOURCE_PARAM = "source";
+  public static final String DEST_PARAM = "dest";
+  
+  private SelectorParams srcInclusions = new SelectorParams();
+  private Collection<SelectorParams> srcExclusions 
+    = new ArrayList<SelectorParams>();
+
+  private FieldNameSelector srcSelector = null;
+  private String dest = null;
+
+  protected final FieldNameSelector getSourceSelector() {
+    if (null != srcSelector) return srcSelector;
+
+    throw new SolrException(SERVER_ERROR, "selector was never initialized, "+
+                            " inform(SolrCore) never called???");
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public void init(NamedList args) {
+    Object d = args.remove(DEST_PARAM);
+    if (null == d) {
+      throw new SolrException
+        (SERVER_ERROR, "Init param '" + DEST_PARAM + "' must be specified"); 
+    } else if (! (d instanceof CharSequence) ) {
+      throw new SolrException
+        (SERVER_ERROR, "Init param '" + DEST_PARAM + "' must be a string (ie: 'str')");
+    }
+    dest = d.toString();
+
+    List<Object> sources = args.getAll(SOURCE_PARAM);
+    if (0 == sources.size()) {
+      throw new SolrException
+        (SERVER_ERROR, "Init param '" + SOURCE_PARAM + "' must be specified"); 
+    } 
+    if (1 == sources.size() && sources.get(0) instanceof NamedList) {
+      // nested set of selector options
+      NamedList selectorConfig = (NamedList) args.remove(SOURCE_PARAM);
+
+      srcInclusions = parseSelectorParams(selectorConfig);
+
+      List<Object> excList = selectorConfig.getAll("exclude");
+
+      for (Object excObj : excList) {
+        if (null == excObj) {
+          throw new SolrException
+            (SERVER_ERROR, "Init param '" + SOURCE_PARAM + 
+             "' child 'exclude' can not be null"); 
+        }
+        if (! (excObj instanceof NamedList) ) {
+          throw new SolrException
+            (SERVER_ERROR, "Init param '" + SOURCE_PARAM + 
+             "' child 'exclude' must be <lst/>"); 
+        }
+        NamedList exc = (NamedList) excObj;
+        srcExclusions.add(parseSelectorParams(exc));
+        if (0 < exc.size()) {
+          throw new SolrException(SERVER_ERROR, "Init param '" + SOURCE_PARAM + 
+                                  "' has unexpected 'exclude' sub-param(s): '" 
+                                  + selectorConfig.getName(0) + "'");
+        }
+        // call once per instance
+        selectorConfig.remove("exclude");
+      }
+
+      if (0 < selectorConfig.size()) {
+        throw new SolrException(SERVER_ERROR, "Init param '" + SOURCE_PARAM + 
+                                "' contains unexpected child param(s): '" + 
+                                selectorConfig.getName(0) + "'");
+      }
+    } else {
+      // source better be one or more strings
+      srcInclusions.fieldName = new HashSet<String>
+        (FieldMutatingUpdateProcessorFactory.oneOrMany(args, "source"));
+    }
+
+    
+
+    if (0 < args.size()) {
+      throw new SolrException(SERVER_ERROR, 
+                              "Unexpected init param(s): '" + 
+                              args.getName(0) + "'");
+    }
+
+    super.init(args);
+  }
+
+  public void inform(final SolrCore core) {
+    
+    final IndexSchema schema = core.getSchema();
+
+    srcSelector = 
+      FieldMutatingUpdateProcessor.createFieldNameSelector
+      (core.getResourceLoader(),
+       core.getSchema(),
+       srcInclusions.fieldName,
+       srcInclusions.typeName,
+       srcInclusions.typeClass,
+       srcInclusions.fieldRegex,
+       FieldMutatingUpdateProcessor.SELECT_NO_FIELDS);
+
+    for (SelectorParams exc : srcExclusions) {
+      srcSelector = FieldMutatingUpdateProcessor.wrap
+        (srcSelector,
+         FieldMutatingUpdateProcessor.createFieldNameSelector
+         (core.getResourceLoader(),
+          core.getSchema(),
+          exc.fieldName,
+          exc.typeName,
+          exc.typeClass,
+          exc.fieldRegex,
+          FieldMutatingUpdateProcessor.SELECT_NO_FIELDS));
+    }
+  }
+
+  @Override
+  public final UpdateRequestProcessor getInstance(SolrQueryRequest req,
+                                                  SolrQueryResponse rsp,
+                                                  UpdateRequestProcessor next) {
+    return new UpdateRequestProcessor(next) {
+      @Override
+      public void processAdd(AddUpdateCommand cmd) throws IOException {
+
+        final SolrInputDocument doc = cmd.getSolrInputDocument();
+
+        // preserve initial values and boost (if any)
+        SolrInputField destField = doc.containsKey(dest) ? 
+          doc.getField(dest) : new SolrInputField(dest); 
+        
+        boolean modified = false;
+        for (final String fname : doc.getFieldNames()) {
+          if (! srcSelector.shouldMutate(fname)) continue;
+
+          for (Object val : doc.getFieldValues(fname)) {
+            // preserve existing dest boost (multiplicitive), ignore src boost
+            destField.addValue(val, 1.0f);
+          }
+          modified=true;
+        }
+
+        if (modified) doc.put(dest, destField);
+
+        super.processAdd(cmd);
+      }
+    };
+  }
+
+  /** macro */
+  private static SelectorParams parseSelectorParams(NamedList args) {
+    return FieldMutatingUpdateProcessorFactory.parseSelectorParams(args);
+  }
+
+}

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/FieldMutatingUpdateProcessorFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/FieldMutatingUpdateProcessorFactory.java?rev=1350050&r1=1350049&r2=1350050&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/FieldMutatingUpdateProcessorFactory.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/FieldMutatingUpdateProcessorFactory.java Wed Jun 13 22:57:48 2012
@@ -108,7 +108,7 @@ public abstract class FieldMutatingUpdat
   extends UpdateRequestProcessorFactory 
   implements SolrCoreAware {
   
-  private static class SelectorParams {
+  public static final class SelectorParams {
     public Set<String> fieldName = Collections.emptySet();
     public Set<String> typeName = Collections.emptySet();
     public Collection<String> typeClass = Collections.emptyList();
@@ -129,7 +129,7 @@ public abstract class FieldMutatingUpdat
   }
 
   @SuppressWarnings("unchecked")
-  private static final SelectorParams parseSelectorParams(NamedList args) {
+  public static SelectorParams parseSelectorParams(NamedList args) {
     SelectorParams params = new SelectorParams();
     
     params.fieldName = new HashSet<String>(oneOrMany(args, "fieldName"));
@@ -246,7 +246,7 @@ public abstract class FieldMutatingUpdat
    * to one or more strings (or arrays of strings)
    * @exception SolrException invalid arr/str structure.
    */
-  private static Collection<String> oneOrMany(final NamedList args, final String key) {
+  public static Collection<String> oneOrMany(final NamedList args, final String key) {
     List<String> result = new ArrayList<String>(args.size() / 2);
     final String err = "init arg '" + key + "' must be a string "
       + "(ie: 'str'), or an array (ie: 'arr') containing strings; found: ";

Added: lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/IgnoreFieldUpdateProcessorFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/IgnoreFieldUpdateProcessorFactory.java?rev=1350050&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/IgnoreFieldUpdateProcessorFactory.java (added)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/IgnoreFieldUpdateProcessorFactory.java Wed Jun 13 22:57:48 2012
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.update.processor;
+
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.schema.FieldType;
+
+import org.apache.solr.common.SolrInputField;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.SolrQueryResponse;
+
+/**
+ * Ignores &amp; removes fields matching the specified 
+ * conditions from any document being added to the index.
+ *
+ * <p>
+ * By default, this processor ignores any field name which does not 
+ * exist according to the schema  
+ * </p>
+ * 
+ * <p>
+ * For example, in the configuration below, any field name which would cause 
+ * an error because it does not exist, or match a dynamicField, in the 
+ * schema.xml would be silently removed from any added documents...
+ * </p>
+ *
+ * <pre class="prettyprint">
+ *  &lt;updateRequestProcessorChain&gt;
+ *    &lt;processor class="solr.IgnoreFieldUpdateProcessorFactory" /&gt;
+ *  &lt;/updateRequestProcessorChain&gt;
+ * </pre>
+ *
+ * <p>
+ * In this second example, any field name ending in "_raw" found in a 
+ * document being added would be removed...
+ * </p>
+ * <pre class="prettyprint">
+ *  &lt;updateRequestProcessorChain&gt;
+ *    &lt;processor class="solr.IgnoreFieldUpdateProcessorFactory"&gt;
+ *      &lt;str name="fieldRegex"&gt;.*_raw&lt;/str&gt;
+ *    &lt;/processor&gt;
+ *  &lt;/updateRequestProcessorChain&gt;
+ * </pre>
+ */
+public final class IgnoreFieldUpdateProcessorFactory extends FieldMutatingUpdateProcessorFactory {
+
+  @Override
+  public UpdateRequestProcessor getInstance(SolrQueryRequest req,
+                                            SolrQueryResponse rsp,
+                                            UpdateRequestProcessor next) {
+    return new FieldMutatingUpdateProcessor(getSelector(), next) {
+      protected SolrInputField mutate(final SolrInputField src) {
+        return null;
+      }
+    };
+  }
+
+  @Override
+  public FieldMutatingUpdateProcessor.FieldNameSelector 
+    getDefaultSelector(final SolrCore core) {
+
+    final IndexSchema schema = core.getSchema();
+    return new FieldMutatingUpdateProcessor.FieldNameSelector() {
+      public boolean shouldMutate(final String fieldName) {
+
+        FieldType type = schema.getFieldTypeNoEx(fieldName);
+        return (null == type);
+
+      }
+    };
+  }
+  
+}
+

Added: lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/TruncateFieldUpdateProcessorFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/TruncateFieldUpdateProcessorFactory.java?rev=1350050&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/TruncateFieldUpdateProcessorFactory.java (added)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/TruncateFieldUpdateProcessorFactory.java Wed Jun 13 22:57:48 2012
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.update.processor;
+
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrException.ErrorCode;
+import org.apache.solr.common.util.NamedList;
+
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.SolrQueryResponse;
+
+/**
+ * Truncates any CharSequence values found in fields matching the specified 
+ * conditions to a maximum character length.
+ * <p>
+ * By default this processor matches no fields
+ * </p>
+ *
+ * <p>For example, with the configuration listed below any documents 
+ * containing a String in any field declared in the schema using 
+ * <code>StrField</code> will be truncated to no more then 100 characters
+ * </p>
+ * <pre class="prettyprint">
+ * &lt;processor class="solr.TruncateFieldUpdateProcessorFactory"&gt;
+ *   &lt;str name="typeClass"&gt;solr.StrField&lt;/str&gt;
+ *   &lt;int name="maxLength"&gt;100&lt;/int&gt;
+ * &lt;/processor&gt;
+ * </pre>
+ */
+public final class TruncateFieldUpdateProcessorFactory 
+  extends FieldMutatingUpdateProcessorFactory {
+
+  private static final String MAX_LENGTH_PARAM = "maxLength";
+
+  private int maxLength = 0;
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public void init(NamedList args) {
+
+    Object lengthParam = args.remove(MAX_LENGTH_PARAM);
+    if (null == lengthParam) {
+      throw new SolrException(ErrorCode.SERVER_ERROR, 
+                              "Missing required init parameter: " + 
+                              MAX_LENGTH_PARAM);
+    }
+    if ( ! (lengthParam instanceof Number) ) {
+      throw new SolrException(ErrorCode.SERVER_ERROR, 
+                              "Init param " + MAX_LENGTH_PARAM + 
+                              "must be a number; found: \"" +
+                              lengthParam.toString());
+    }
+    maxLength = ((Number)lengthParam).intValue();
+    if (maxLength < 0) {
+      throw new SolrException(ErrorCode.SERVER_ERROR, 
+                              "Init param " + MAX_LENGTH_PARAM + 
+                              "must be >= 0; found: " + maxLength);
+    }
+
+    super.init(args);
+  }
+
+  @Override
+  public FieldMutatingUpdateProcessor.FieldNameSelector 
+    getDefaultSelector(final SolrCore core) {
+
+    return FieldMutatingUpdateProcessor.SELECT_NO_FIELDS;
+  }
+  
+  @Override
+  public UpdateRequestProcessor getInstance(SolrQueryRequest req,
+                                            SolrQueryResponse rsp,
+                                            UpdateRequestProcessor next) {
+    return new FieldValueMutatingUpdateProcessor(getSelector(), next) {
+      protected Object mutateValue(final Object src) {
+        if (src instanceof CharSequence) {
+          CharSequence s = (CharSequence)src;
+          if (maxLength < s.length()) {
+            return s.subSequence(0, maxLength);
+          }
+        }
+        return src;
+      }
+    };
+  }
+}
+

Modified: lucene/dev/trunk/solr/core/src/test-files/solr/conf/solrconfig-update-processor-chains.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test-files/solr/conf/solrconfig-update-processor-chains.xml?rev=1350050&r1=1350049&r2=1350050&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test-files/solr/conf/solrconfig-update-processor-chains.xml (original)
+++ lucene/dev/trunk/solr/core/src/test-files/solr/conf/solrconfig-update-processor-chains.xml Wed Jun 13 22:57:48 2012
@@ -224,6 +224,126 @@
     <processor class="solr.TrimFieldUpdateProcessorFactory"/>
   </updateRequestProcessorChain>
 
+  <updateRequestProcessorChain name="truncate">
+    <processor class="solr.TruncateFieldUpdateProcessorFactory">
+      <str name="fieldName">trunc</str>
+      <int name="maxLength">5</int>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="ignore-not-in-schema">
+    <processor class="solr.IgnoreFieldUpdateProcessorFactory" />
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="ignore-some">
+    <processor class="solr.IgnoreFieldUpdateProcessorFactory">
+      <str name="fieldRegex">.*_raw</str>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="clone-single">
+    <processor class="solr.CloneFieldUpdateProcessorFactory">
+      <str name="source">source1_s</str>
+      <str name="dest">dest_s</str>
+    </processor>
+  </updateRequestProcessorChain>
+  <updateRequestProcessorChain name="clone-multi">
+    <processor class="solr.CloneFieldUpdateProcessorFactory">
+      <str name="source">source1_s</str>
+      <str name="source">source2_s</str>
+      <str name="dest">dest_s</str>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="clone-array">
+    <processor class="solr.CloneFieldUpdateProcessorFactory">
+      <arr name="source">
+        <str>source1_s</str>
+        <str>source2_s</str>
+      </arr>
+      <str name="dest">dest_s</str>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="clone-selector">
+    <processor class="solr.CloneFieldUpdateProcessorFactory">
+      <lst name="source">
+        <str name="fieldRegex">source\d_.*</str>
+        <lst name="exclude">
+          <str name="fieldRegex">source0_.*</str>
+        </lst>
+      </lst>
+      <str name="dest">dest_s</str>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="clone-max-chars">
+    <processor class="solr.CloneFieldUpdateProcessorFactory">
+      <str name="source">field1</str>
+      <str name="dest">toField</str>
+    </processor>
+    <processor class="solr.TruncateFieldUpdateProcessorFactory">
+      <str name="fieldName">toField</str>
+      <int name="maxLength">3</int>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="clone-move">
+    <processor class="solr.CloneFieldUpdateProcessorFactory">
+      <str name="source">field1</str>
+      <str name="dest">toField</str>
+    </processor>
+    <processor class="solr.IgnoreFieldUpdateProcessorFactory">
+      <str name="fieldName">field1</str>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="clone-replace">
+    <processor class="solr.IgnoreFieldUpdateProcessorFactory">
+      <str name="fieldName">toField</str>
+    </processor>
+    <processor class="solr.CloneFieldUpdateProcessorFactory">
+      <str name="source">field1</str>
+      <str name="dest">toField</str>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="clone-append">
+    <processor class="solr.CloneFieldUpdateProcessorFactory">
+      <str name="source">field1</str>
+      <str name="source">field2</str>
+      <str name="dest">toField</str>
+    </processor>
+    <processor class="solr.ConcatFieldUpdateProcessorFactory">
+      <str name="delimiter">; </str>
+      <str name="fieldName">toField</str>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <!-- example used in CloneFieldUpdateProcessorFactory javadocs -->
+  <updateRequestProcessorChain name="multiple-clones">
+    <processor class="solr.CloneFieldUpdateProcessorFactory">
+      <str name="source">category</str>
+      <str name="dest">category_s</str>
+    </processor>
+    <processor class="solr.CloneFieldUpdateProcessorFactory">
+      <arr name="source">
+        <str>authors</str>
+        <str>editors</str>
+      </arr>
+      <str name="dest">contributors</str>
+    </processor>
+    <processor class="solr.CloneFieldUpdateProcessorFactory">
+      <lst name="source">
+        <str name="fieldRegex">.*_price</str>
+        <lst name="exclude">
+          <str name="fieldName">list_price</str>
+        </lst>
+      </lst>
+      <str name="dest">all_prices</str>
+    </processor>
+  </updateRequestProcessorChain>
+
   <updateRequestProcessorChain name="regex-replace">
     <processor class="solr.RegexReplaceProcessorFactory">
       <str name="fieldName">content</str>

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/processor/FieldMutatingUpdateProcessorTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/processor/FieldMutatingUpdateProcessorTest.java?rev=1350050&r1=1350049&r2=1350050&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/processor/FieldMutatingUpdateProcessorTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/processor/FieldMutatingUpdateProcessorTest.java Wed Jun 13 22:57:48 2012
@@ -34,6 +34,7 @@ import org.apache.solr.common.params.Mod
 import org.apache.solr.common.params.SolrParams;
 
 import org.apache.solr.core.SolrCore;
+import org.apache.solr.schema.IndexSchema;
 
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.request.LocalSolrQueryRequest;
@@ -86,13 +87,8 @@ public class FieldMutatingUpdateProcesso
             ,"//long[@name='first_foo_l'][.='"+count+"']"
             ,"//long[@name='min_foo_l'][.='-34']"
             );
-
-
-
   }
 
-
-
   public void testTrimAll() throws Exception {
     SolrInputDocument d = null;
 
@@ -559,6 +555,213 @@ public class FieldMutatingUpdateProcesso
    
   }
 
+  public void testTruncate() throws Exception {
+    SolrInputDocument d = null;
+
+    d = processAdd("truncate", 
+                   doc(f("id", "1111"),
+                       f("trunc", "123456789", "", 42, "abcd")));
+
+    assertNotNull(d);
+
+    assertEquals(Arrays.asList("12345", "", 42, "abcd"),
+                 d.getFieldValues("trunc"));
+  }
+
+  public void testIgnore() throws Exception {
+
+    IndexSchema schema = h.getCore().getSchema();
+    assertNull("test expects 'foo_giberish' to not be a valid field, looks like schema was changed out from under us",
+               schema.getFieldTypeNoEx("foo_giberish"));
+    assertNotNull("test expects 't_raw' to be a valid field, looks like schema was changed out from under us",
+                  schema.getFieldTypeNoEx("t_raw"));
+    assertNotNull("test expects 'foo_s' to be a valid field, looks like schema was changed out from under us",
+                  schema.getFieldTypeNoEx("foo_s"));
+ 
+    SolrInputDocument d = null;
+    
+    d = processAdd("ignore-not-in-schema",       
+                   doc(f("id", "1111"),
+                       f("foo_giberish", "123456789", "", 42, "abcd"),
+                       f("t_raw", "123456789", "", 42, "abcd"),
+                       f("foo_s", "hoss")));
+    
+    assertNotNull(d);
+    assertFalse(d.containsKey("foo_giberish"));
+    assertEquals(Arrays.asList("123456789", "", 42, "abcd"), 
+                 d.getFieldValues("t_raw"));
+    assertEquals("hoss", d.getFieldValue("foo_s"));
+
+    d = processAdd("ignore-some",
+                   doc(f("id", "1111"),
+                       f("foo_giberish", "123456789", "", 42, "abcd"),
+                       f("t_raw", "123456789", "", 42, "abcd"),
+                       f("foo_s", "hoss")));
+
+    assertNotNull(d);
+    assertEquals(Arrays.asList("123456789", "", 42, "abcd"), 
+                 d.getFieldValues("foo_giberish"));
+    assertFalse(d.containsKey("t_raw"));
+    assertEquals("hoss", d.getFieldValue("foo_s"));
+    
+
+  }
+
+  public void testCloneField() throws Exception {
+
+    SolrInputDocument d = null;
+
+    // regardless of chain, all of these should be equivilent
+    for (String chain : Arrays.asList("clone-single", "clone-multi", 
+                                      "clone-array","clone-selector" )) {
+
+      // simple clone
+      d = processAdd(chain,       
+                     doc(f("id", "1111"),
+                         f("source0_s", "NOT COPIED"),
+                         f("source1_s", "123456789", "", 42, "abcd")));
+      assertNotNull(chain, d);
+      assertEquals(chain,
+                   Arrays.asList("123456789", "", 42, "abcd"), 
+                   d.getFieldValues("source1_s"));
+      assertEquals(chain,
+                   Arrays.asList("123456789", "", 42, "abcd"), 
+                   d.getFieldValues("dest_s"));
+
+      // append to existing values, preserve boost
+      d = processAdd(chain,       
+                     doc(f("id", "1111"),
+                         field("dest_s", 2.3f, "orig1", "orig2"),
+                         f("source0_s", "NOT COPIED"),
+                         f("source1_s", "123456789", "", 42, "abcd")));
+      assertNotNull(chain, d);
+      assertEquals(chain,
+                   Arrays.asList("123456789", "", 42, "abcd"), 
+                   d.getFieldValues("source1_s"));
+      assertEquals(chain,
+                   Arrays.asList("orig1", "orig2", "123456789", "", 42, "abcd"),
+                   d.getFieldValues("dest_s"));
+      assertEquals(chain + ": dest boost changed", 
+                   2.3f, d.getField("dest_s").getBoost(), 0.0f);
+    }
+
+    // should be equivilent for any chain matching source1_s and source2_s
+    for (String chain : Arrays.asList("clone-multi",
+                                      "clone-array","clone-selector" )) {
+
+      // simple clone
+      d = processAdd(chain,       
+                     doc(f("id", "1111"),
+                         f("source0_s", "NOT COPIED"),
+                         f("source1_s", "123456789", "", 42, "abcd"),
+                         f("source2_s", "xxx", 999)));
+      assertNotNull(chain, d);
+      assertEquals(chain,
+                   Arrays.asList("123456789", "", 42, "abcd"), 
+                   d.getFieldValues("source1_s"));
+      assertEquals(chain,
+                   Arrays.asList("xxx", 999),
+                   d.getFieldValues("source2_s"));
+      assertEquals(chain,
+                   Arrays.asList("123456789", "", 42, "abcd", "xxx", 999), 
+                   d.getFieldValues("dest_s"));
+
+      // append to existing values, preserve boost
+      d = processAdd(chain,       
+                     doc(f("id", "1111"),
+                         field("dest_s", 2.3f, "orig1", "orig2"),
+                         f("source0_s", "NOT COPIED"),
+                         f("source1_s", "123456789", "", 42, "abcd"),
+                         f("source2_s", "xxx", 999)));
+      assertNotNull(chain, d);
+      assertEquals(chain,
+                   Arrays.asList("123456789", "", 42, "abcd"), 
+                   d.getFieldValues("source1_s"));
+      assertEquals(chain,
+                   Arrays.asList("xxx", 999),
+                   d.getFieldValues("source2_s"));
+      assertEquals(chain,
+                   Arrays.asList("orig1", "orig2", 
+                                 "123456789", "", 42, "abcd",
+                                 "xxx", 999),
+                   d.getFieldValues("dest_s"));
+      assertEquals(chain + ": dest boost changed", 
+                   2.3f, d.getField("dest_s").getBoost(), 0.0f);
+    }
+  }
+
+  public void testCloneFieldExample() throws Exception {
+
+    SolrInputDocument d = null;
+
+    // test example from the javadocs
+    d = processAdd("multiple-clones",       
+                   doc(f("id", "1111"),
+                       f("category", "misc"),
+                       f("authors", "Isaac Asimov", "John Brunner"),
+                       f("editors", "John W. Campbell"),
+                       f("store1_price", 87),
+                       f("store2_price", 78),
+                       f("list_price", 1000)));
+    assertNotNull(d);
+    assertEquals("misc",d.getFieldValue("category"));
+    assertEquals("misc",d.getFieldValue("category_s"));
+    assertEquals(Arrays.asList("Isaac Asimov", "John Brunner"),
+                 d.getFieldValues("authors"));
+    assertEquals(Arrays.asList("John W. Campbell"),
+                 d.getFieldValues("editors"));
+    assertEquals(Arrays.asList("Isaac Asimov", "John Brunner", 
+                               "John W. Campbell"),
+                 d.getFieldValues("contributors"));
+    assertEquals(87,d.getFieldValue("store1_price"));
+    assertEquals(78,d.getFieldValue("store2_price"));
+    assertEquals(1000,d.getFieldValue("list_price"));
+    assertEquals(Arrays.asList(87, 78),
+                 d.getFieldValues("all_prices"));
+
+  } 
+
+  public void testCloneCombinations() throws Exception {
+
+    SolrInputDocument d = null;
+
+    // maxChars
+    d = processAdd("clone-max-chars",
+                   doc(f("id", "1111"),
+                       f("field1", "text")));
+    assertNotNull(d);
+    assertEquals("text",d.getFieldValue("field1"));
+    assertEquals("tex",d.getFieldValue("toField"));
+
+    // move
+    d = processAdd("clone-move",
+                   doc(f("id", "1111"),
+                       f("field1", "text")));
+    assertNotNull(d);
+    assertEquals("text",d.getFieldValue("toField"));
+    assertFalse(d.containsKey("field1"));
+
+    // replace
+    d = processAdd("clone-replace",
+                   doc(f("id", "1111"),
+                       f("toField", "IGNORED"),
+                       f("field1", "text")));
+    assertNotNull(d);
+    assertEquals("text", d.getFieldValue("field1"));
+    assertEquals("text", d.getFieldValue("toField"));
+
+    // append
+    d = processAdd("clone-append",
+                   doc(f("id", "1111"),
+                       f("toField", "aaa"),
+                       f("field1", "bbb"),
+                       f("field2", "ccc")));
+    assertNotNull(d);
+    assertEquals("bbb", d.getFieldValue("field1"));
+    assertEquals("ccc", d.getFieldValue("field2"));
+    assertEquals("aaa; bbb; ccc", d.getFieldValue("toField"));
+  } 
+
   public void testConcatDefaults() throws Exception {
     SolrInputDocument d = null;
     d = processAdd("concat-defaults",