You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ho...@apache.org on 2013/08/29 19:31:17 UTC

svn commit: r1518717 - in /lucene/dev/trunk/solr: ./ core/src/java/org/apache/solr/update/processor/ core/src/test-files/solr/collection1/conf/ core/src/test/org/apache/solr/update/processor/

Author: hossman
Date: Thu Aug 29 17:31:17 2013
New Revision: 1518717

URL: http://svn.apache.org/r1518717
Log:
SOLR-4249: UniqFieldsUpdateProcessorFactory now extends FieldMutatingUpdateProcessorFactory and supports all of it's selector options

Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/UniqFieldsUpdateProcessorFactory.java
    lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/processor/FieldMutatingUpdateProcessorTest.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1518717&r1=1518716&r2=1518717&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Thu Aug 29 17:31:17 2013
@@ -71,6 +71,11 @@ Upgrading from Solr 4.4.0
   XXXXX and delete the ones that you do not wish to use.  See SOLR-4953 & 
   SOLR-5108 for more details.
 
+* The UniqFieldsUpdateProcessorFactory has been improved to support all of the
+  FieldMutatingUpdateProcessorFactory selector options.  The <lst named="fields"> 
+  init param option is now deprecated and should be replaced with the more standard
+  <arr name="fieldName">.  See SOLR-4249 for more details.
+
 Detailed Change List
 ----------------------
 
@@ -103,6 +108,11 @@ New Features
 * SOLR-5182: Add NoOpRegenerator, a regenerator for custom per-segment caches
   where items are preserved across commits.  (Robert Muir)
 
+* SOLR-4249: UniqFieldsUpdateProcessorFactory now extends 
+  FieldMutatingUpdateProcessorFactory and supports all of it's selector options. Use
+  of the "fields" init param is now deprecated in favor of "fieldName" (hossman)
+  
+
 Bug Fixes
 ----------------------
 

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/UniqFieldsUpdateProcessorFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/UniqFieldsUpdateProcessorFactory.java?rev=1518717&r1=1518716&r2=1518717&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/UniqFieldsUpdateProcessorFactory.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/UniqFieldsUpdateProcessorFactory.java Thu Aug 29 17:31:17 2013
@@ -23,6 +23,9 @@ import java.util.Collection;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
+import java.util.Map;
+
+import org.apache.solr.core.SolrCore;
 
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.util.NamedList;
@@ -30,77 +33,68 @@ import org.apache.solr.request.SolrQuery
 import org.apache.solr.response.SolrQueryResponse;
 import org.apache.solr.update.AddUpdateCommand;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 /**
- * A non-duplicate processor. Removes duplicates in the specified fields.
- * 
- * <pre class="prettyprint" >
- * &lt;updateRequestProcessorChain name="uniq-fields"&gt;
- *   &lt;processor class="org.apache.solr.update.processor.UniqFieldsUpdateProcessorFactory"&gt;
- *     &lt;lst name="fields"&gt;
- *       &lt;str&gt;uniq&lt;/str&gt;
- *       &lt;str&gt;uniq2&lt;/str&gt;
- *       &lt;str&gt;uniq3&lt;/str&gt;
- *     &lt;/lst&gt;      
- *   &lt;/processor&gt;
- *   &lt;processor class="solr.RunUpdateProcessorFactory" /&gt;
- * &lt;/updateRequestProcessorChain&gt;</pre>
+ * Removes duplicate values found in fields matching the specified conditions.  
+ * The existing field values are iterated in order, and values are removed when 
+ * they are equal to a value that has already been seen for this field.
+ * <p>
+ * By default this processor matches no fields.
+ * </p>
  * 
+ * <p>
+ * In the example configuration below, if a document initially contains the values 
+ * <code>"Steve","Lucy","Jim",Steve","Alice","Bob","Alice"</code> in a field named 
+ * <code>foo_uniq</code> then using this processor will result in the final list of 
+ * field values being <code>"Steve","Lucy","Jim","Alice","Bob"</code>
+ * </p>
+ * <pre class="prettyprint">
+ *  &lt;processor class="solr.UniqFieldsUpdateProcessorFactory"&gt;
+ *    &lt;str name="fieldRegex"&gt;.*_uniq&lt;/str&gt;
+ *  &lt;/processor&gt;
+ * </pre> 
  */
-public class UniqFieldsUpdateProcessorFactory extends UpdateRequestProcessorFactory {
+public class UniqFieldsUpdateProcessorFactory extends FieldValueSubsetUpdateProcessorFactory {
+
+  public final static Logger log = LoggerFactory.getLogger(UniqFieldsUpdateProcessorFactory.class);
 
-  private Set<String> fields;
+  @Override
+  public FieldMutatingUpdateProcessor.FieldNameSelector 
+    getDefaultSelector(final SolrCore core) {
+    
+    return FieldMutatingUpdateProcessor.SELECT_NO_FIELDS;
+  }
 
   @SuppressWarnings("unchecked")
   @Override
   public void init(@SuppressWarnings("rawtypes") NamedList args) {
-    NamedList<String> flst = (NamedList<String>)args.get("fields");
+    // legacy init param support, will be removed in 5.0
+    // no idea why this was ever implimented as <lst> should have just been <arr>
+    NamedList<String> flst = (NamedList<String>) args.remove("fields");
     if(flst != null){
-      fields = new HashSet<String>();
-      for(int i = 0; i < flst.size(); i++){
-        fields.add(flst.getVal(i));
+      log.warn("Use of the 'fields' init param in UniqFieldsUpdateProcessorFactory is deprecated, please use 'fieldName' (or another FieldMutatingUpdateProcessorFactory selector option) instead");
+      log.info("Replacing 'fields' init param with (individual) 'fieldName' params");
+      for (Map.Entry<String,String> entry : flst) {
+        args.add("fieldName", entry.getValue());
       }
     }
+    super.init(args);
   }
   
   @Override
-  public UpdateRequestProcessor getInstance(SolrQueryRequest req,
-                                            SolrQueryResponse rsp,
-                                            UpdateRequestProcessor next) {
-    return new UniqFieldsUpdateProcessor(next, fields);
-  }
-  
-  public class UniqFieldsUpdateProcessor extends UpdateRequestProcessor {
-    
-    private final Set<String> fields;
-
-    public UniqFieldsUpdateProcessor(UpdateRequestProcessor next, 
-                                              Set<String> fields) {
-      super(next);
-      this.fields = fields;
-    }
-    
-    @Override
-    public void processAdd(AddUpdateCommand cmd) throws IOException {
-      if(fields != null){
-        SolrInputDocument solrInputDocument = cmd.getSolrInputDocument();
-        List<Object> uniqList = new ArrayList<Object>();
-        for (String field : fields) {
-          uniqList.clear();
-          Collection<Object> col = solrInputDocument.getFieldValues(field);
-          if (col != null) {
-            for (Object o : col) {
-              if(!uniqList.contains(o))
-                uniqList.add(o);
-            }
-            solrInputDocument.remove(field);
-            for (Object o : uniqList) {
-              solrInputDocument.addField(field, o);
-            }
-          }    
-        }
+  @SuppressWarnings("unchecked")
+  public Collection pickSubset(Collection values) {
+    Set<Object> uniqs = new HashSet<Object>();
+    List<Object> result = new ArrayList<Object>(values.size());
+    for (Object o : values) {
+      if (!uniqs.contains(o)) {
+        uniqs.add(o);
+        result.add(o);
       }
-      super.processAdd(cmd);
     }
+    return result;
   }
 }
 

Modified: lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml?rev=1518717&r1=1518716&r2=1518717&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml (original)
+++ lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml Thu Aug 29 17:31:17 2013
@@ -429,6 +429,12 @@
     </processor>
   </updateRequestProcessorChain>
 
+  <updateRequestProcessorChain name="uniq-values">
+    <processor class="solr.UniqFieldsUpdateProcessorFactory">
+      <str name="fieldRegex">uniq_.*</str>
+    </processor>
+  </updateRequestProcessorChain>
+
   <updateRequestProcessorChain name="pre-analyzed-simple">
     <processor class="solr.PreAnalyzedUpdateProcessorFactory">
       <str name="fieldName">subject</str>

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/processor/FieldMutatingUpdateProcessorTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/processor/FieldMutatingUpdateProcessorTest.java?rev=1518717&r1=1518716&r2=1518717&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/processor/FieldMutatingUpdateProcessorTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/processor/FieldMutatingUpdateProcessorTest.java Thu Aug 29 17:31:17 2013
@@ -94,6 +94,28 @@ public class FieldMutatingUpdateProcesso
                  5.0F, d.getField("foo_s").getBoost(), 0.0F);
   }
 
+  public void testUniqValues() throws Exception {
+    final String chain = "uniq-values";
+    SolrInputDocument d = null;
+    d = processAdd(chain,
+                   doc(f("id", "1111"),
+                       f("name", "Hoss", "Man", "Hoss"),
+                       f("uniq_1_s", "Hoss", "Man", "Hoss"),
+                       f("uniq_2_s", "Foo", "Hoss", "Man", "Hoss", "Bar"),
+                       f("uniq_3_s", 5.0F, 23, "string", 5.0F)));
+    
+    assertNotNull(d);
+    
+    assertEquals(Arrays.asList("Hoss", "Man", "Hoss"),
+                 d.getFieldValues("name"));
+    assertEquals(Arrays.asList("Hoss","Man"), 
+                 d.getFieldValues("uniq_1_s"));
+    assertEquals(Arrays.asList("Foo","Hoss","Man","Bar"),
+                 d.getFieldValues("uniq_2_s"));
+    assertEquals(Arrays.asList(5.0F, 23, "string"),
+                 d.getFieldValues("uniq_3_s"));
+  }
+
   public void testTrimFields() throws Exception {
     for (String chain : Arrays.asList("trim-fields", "trim-fields-arr")) {
       SolrInputDocument d = null;