You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ho...@apache.org on 2013/08/29 19:31:17 UTC
svn commit: r1518717 - in /lucene/dev/trunk/solr: ./
core/src/java/org/apache/solr/update/processor/
core/src/test-files/solr/collection1/conf/
core/src/test/org/apache/solr/update/processor/
Author: hossman
Date: Thu Aug 29 17:31:17 2013
New Revision: 1518717
URL: http://svn.apache.org/r1518717
Log:
SOLR-4249: UniqFieldsUpdateProcessorFactory now extends FieldMutatingUpdateProcessorFactory and supports all of it's selector options
Modified:
lucene/dev/trunk/solr/CHANGES.txt
lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/UniqFieldsUpdateProcessorFactory.java
lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml
lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/processor/FieldMutatingUpdateProcessorTest.java
Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1518717&r1=1518716&r2=1518717&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Thu Aug 29 17:31:17 2013
@@ -71,6 +71,11 @@ Upgrading from Solr 4.4.0
XXXXX and delete the ones that you do not wish to use. See SOLR-4953 &
SOLR-5108 for more details.
+* The UniqFieldsUpdateProcessorFactory has been improved to support all of the
+ FieldMutatingUpdateProcessorFactory selector options. The <lst named="fields">
+ init param option is now deprecated and should be replaced with the more standard
+ <arr name="fieldName">. See SOLR-4249 for more details.
+
Detailed Change List
----------------------
@@ -103,6 +108,11 @@ New Features
* SOLR-5182: Add NoOpRegenerator, a regenerator for custom per-segment caches
where items are preserved across commits. (Robert Muir)
+* SOLR-4249: UniqFieldsUpdateProcessorFactory now extends
+ FieldMutatingUpdateProcessorFactory and supports all of it's selector options. Use
+ of the "fields" init param is now deprecated in favor of "fieldName" (hossman)
+
+
Bug Fixes
----------------------
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/UniqFieldsUpdateProcessorFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/UniqFieldsUpdateProcessorFactory.java?rev=1518717&r1=1518716&r2=1518717&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/UniqFieldsUpdateProcessorFactory.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/UniqFieldsUpdateProcessorFactory.java Thu Aug 29 17:31:17 2013
@@ -23,6 +23,9 @@ import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
+import java.util.Map;
+
+import org.apache.solr.core.SolrCore;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.util.NamedList;
@@ -30,77 +33,68 @@ import org.apache.solr.request.SolrQuery
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.update.AddUpdateCommand;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
/**
- * A non-duplicate processor. Removes duplicates in the specified fields.
- *
- * <pre class="prettyprint" >
- * <updateRequestProcessorChain name="uniq-fields">
- * <processor class="org.apache.solr.update.processor.UniqFieldsUpdateProcessorFactory">
- * <lst name="fields">
- * <str>uniq</str>
- * <str>uniq2</str>
- * <str>uniq3</str>
- * </lst>
- * </processor>
- * <processor class="solr.RunUpdateProcessorFactory" />
- * </updateRequestProcessorChain></pre>
+ * Removes duplicate values found in fields matching the specified conditions.
+ * The existing field values are iterated in order, and values are removed when
+ * they are equal to a value that has already been seen for this field.
+ * <p>
+ * By default this processor matches no fields.
+ * </p>
*
+ * <p>
+ * In the example configuration below, if a document initially contains the values
+ * <code>"Steve","Lucy","Jim",Steve","Alice","Bob","Alice"</code> in a field named
+ * <code>foo_uniq</code> then using this processor will result in the final list of
+ * field values being <code>"Steve","Lucy","Jim","Alice","Bob"</code>
+ * </p>
+ * <pre class="prettyprint">
+ * <processor class="solr.UniqFieldsUpdateProcessorFactory">
+ * <str name="fieldRegex">.*_uniq</str>
+ * </processor>
+ * </pre>
*/
-public class UniqFieldsUpdateProcessorFactory extends UpdateRequestProcessorFactory {
+public class UniqFieldsUpdateProcessorFactory extends FieldValueSubsetUpdateProcessorFactory {
+
+ public final static Logger log = LoggerFactory.getLogger(UniqFieldsUpdateProcessorFactory.class);
- private Set<String> fields;
+ @Override
+ public FieldMutatingUpdateProcessor.FieldNameSelector
+ getDefaultSelector(final SolrCore core) {
+
+ return FieldMutatingUpdateProcessor.SELECT_NO_FIELDS;
+ }
@SuppressWarnings("unchecked")
@Override
public void init(@SuppressWarnings("rawtypes") NamedList args) {
- NamedList<String> flst = (NamedList<String>)args.get("fields");
+ // legacy init param support, will be removed in 5.0
+ // no idea why this was ever implimented as <lst> should have just been <arr>
+ NamedList<String> flst = (NamedList<String>) args.remove("fields");
if(flst != null){
- fields = new HashSet<String>();
- for(int i = 0; i < flst.size(); i++){
- fields.add(flst.getVal(i));
+ log.warn("Use of the 'fields' init param in UniqFieldsUpdateProcessorFactory is deprecated, please use 'fieldName' (or another FieldMutatingUpdateProcessorFactory selector option) instead");
+ log.info("Replacing 'fields' init param with (individual) 'fieldName' params");
+ for (Map.Entry<String,String> entry : flst) {
+ args.add("fieldName", entry.getValue());
}
}
+ super.init(args);
}
@Override
- public UpdateRequestProcessor getInstance(SolrQueryRequest req,
- SolrQueryResponse rsp,
- UpdateRequestProcessor next) {
- return new UniqFieldsUpdateProcessor(next, fields);
- }
-
- public class UniqFieldsUpdateProcessor extends UpdateRequestProcessor {
-
- private final Set<String> fields;
-
- public UniqFieldsUpdateProcessor(UpdateRequestProcessor next,
- Set<String> fields) {
- super(next);
- this.fields = fields;
- }
-
- @Override
- public void processAdd(AddUpdateCommand cmd) throws IOException {
- if(fields != null){
- SolrInputDocument solrInputDocument = cmd.getSolrInputDocument();
- List<Object> uniqList = new ArrayList<Object>();
- for (String field : fields) {
- uniqList.clear();
- Collection<Object> col = solrInputDocument.getFieldValues(field);
- if (col != null) {
- for (Object o : col) {
- if(!uniqList.contains(o))
- uniqList.add(o);
- }
- solrInputDocument.remove(field);
- for (Object o : uniqList) {
- solrInputDocument.addField(field, o);
- }
- }
- }
+ @SuppressWarnings("unchecked")
+ public Collection pickSubset(Collection values) {
+ Set<Object> uniqs = new HashSet<Object>();
+ List<Object> result = new ArrayList<Object>(values.size());
+ for (Object o : values) {
+ if (!uniqs.contains(o)) {
+ uniqs.add(o);
+ result.add(o);
}
- super.processAdd(cmd);
}
+ return result;
}
}
Modified: lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml?rev=1518717&r1=1518716&r2=1518717&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml (original)
+++ lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml Thu Aug 29 17:31:17 2013
@@ -429,6 +429,12 @@
</processor>
</updateRequestProcessorChain>
+ <updateRequestProcessorChain name="uniq-values">
+ <processor class="solr.UniqFieldsUpdateProcessorFactory">
+ <str name="fieldRegex">uniq_.*</str>
+ </processor>
+ </updateRequestProcessorChain>
+
<updateRequestProcessorChain name="pre-analyzed-simple">
<processor class="solr.PreAnalyzedUpdateProcessorFactory">
<str name="fieldName">subject</str>
Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/processor/FieldMutatingUpdateProcessorTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/processor/FieldMutatingUpdateProcessorTest.java?rev=1518717&r1=1518716&r2=1518717&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/processor/FieldMutatingUpdateProcessorTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/processor/FieldMutatingUpdateProcessorTest.java Thu Aug 29 17:31:17 2013
@@ -94,6 +94,28 @@ public class FieldMutatingUpdateProcesso
5.0F, d.getField("foo_s").getBoost(), 0.0F);
}
+ public void testUniqValues() throws Exception {
+ final String chain = "uniq-values";
+ SolrInputDocument d = null;
+ d = processAdd(chain,
+ doc(f("id", "1111"),
+ f("name", "Hoss", "Man", "Hoss"),
+ f("uniq_1_s", "Hoss", "Man", "Hoss"),
+ f("uniq_2_s", "Foo", "Hoss", "Man", "Hoss", "Bar"),
+ f("uniq_3_s", 5.0F, 23, "string", 5.0F)));
+
+ assertNotNull(d);
+
+ assertEquals(Arrays.asList("Hoss", "Man", "Hoss"),
+ d.getFieldValues("name"));
+ assertEquals(Arrays.asList("Hoss","Man"),
+ d.getFieldValues("uniq_1_s"));
+ assertEquals(Arrays.asList("Foo","Hoss","Man","Bar"),
+ d.getFieldValues("uniq_2_s"));
+ assertEquals(Arrays.asList(5.0F, 23, "string"),
+ d.getFieldValues("uniq_3_s"));
+ }
+
public void testTrimFields() throws Exception {
for (String chain : Arrays.asList("trim-fields", "trim-fields-arr")) {
SolrInputDocument d = null;