You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by er...@apache.org on 2014/10/20 02:32:16 UTC
svn commit: r1633024 - in /lucene/dev/branches/branch_5x: ./ solr/
solr/CHANGES.txt solr/core/
solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java
Author: erick
Date: Mon Oct 20 00:32:16 2014
New Revision: 1633024
URL: http://svn.apache.org/r1633024
Log:
SOLR-5992: add removeregex as an atomic update operation, Thanks Vitaliy
Modified:
lucene/dev/branches/branch_5x/ (props changed)
lucene/dev/branches/branch_5x/solr/ (props changed)
lucene/dev/branches/branch_5x/solr/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_5x/solr/core/ (props changed)
lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java
Modified: lucene/dev/branches/branch_5x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/CHANGES.txt?rev=1633024&r1=1633023&r2=1633024&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/solr/CHANGES.txt Mon Oct 20 00:32:16 2014
@@ -154,6 +154,8 @@ New Features
* SOLR-6517: CollectionsAPI call REBALANCELEADERS. Used to balance leaders
across nodes for a particular collection
+* SOLR-5992: add "removeregex" as an atomic update operation
+ (Vitaliy Zhovtyuk via Erick Erickson)
Bug Fixes
----------------------
Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java?rev=1633024&r1=1633023&r2=1633024&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java Mon Oct 20 00:32:16 2014
@@ -26,6 +26,7 @@ import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
+import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
@@ -34,6 +35,8 @@ import java.util.concurrent.ExecutorServ
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.ReentrantLock;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
@@ -1146,6 +1149,10 @@ public class DistributedUpdateProcessor
updateField = true;
doRemove(oldDoc, sif, fieldVal, schema);
break;
+ case "removeregex":
+ updateField = true;
+ doRemoveRegex(oldDoc, sif, fieldVal);
+ break;
case "inc":
updateField = true;
doInc(oldDoc, schema, sif, fieldVal);
@@ -1225,6 +1232,38 @@ public class DistributedUpdateProcessor
return oldSize > existingField.getValueCount();
}
+ private void doRemoveRegex(SolrInputDocument oldDoc, SolrInputField sif, Object valuePatterns) {
+ final String name = sif.getName();
+ final SolrInputField existingField = oldDoc.get(name);
+ if (existingField != null) {
+ final Collection<Object> valueToRemove = new HashSet<>();
+ final Collection<Object> original = existingField.getValues();
+ final Collection<Pattern> patterns = preparePatterns(valuePatterns);
+ for (Object value : original) {
+ for(Pattern pattern : patterns) {
+ final Matcher m = pattern.matcher(value.toString());
+ if (m.matches()) {
+ valueToRemove.add(value);
+ }
+ }
+ }
+ original.removeAll(valueToRemove);
+ oldDoc.setField(name, original);
+ }
+ }
+
+ private Collection<Pattern> preparePatterns(Object fieldVal) {
+ final Collection<Pattern> patterns = new LinkedHashSet<>(1);
+ if (fieldVal instanceof Collection) {
+ Collection<String> patternVals = (Collection<String>) fieldVal;
+ for (String patternVal : patternVals) {
+ patterns.add(Pattern.compile(patternVal));
+ }
+ } else {
+ patterns.add(Pattern.compile(fieldVal.toString()));
+ }
+ return patterns;
+ }
@Override
public void processDelete(DeleteUpdateCommand cmd) throws IOException {
Modified: lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java?rev=1633024&r1=1633023&r2=1633024&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java Mon Oct 20 00:32:16 2014
@@ -815,6 +815,73 @@ public class AtomicUpdatesTest extends S
assertQ(req("q", "floatRemove:\"111.111\"", "indent", "true"), "//result[@numFound = '3']");
}
+ @Test
+ public void testRemoveregex() throws Exception {
+ SolrInputDocument doc;
+
+ doc = new SolrInputDocument();
+ doc.setField("id", "1");
+ doc.setField("cat", new String[]{"aaa", "bbb", "ccc", "ccc", "ddd"});
+ assertU(adoc(doc));
+
+ doc = new SolrInputDocument();
+ doc.setField("id", "2");
+ doc.setField("cat", new String[]{"aaa", "bbb", "bbb", "ccc", "ddd"});
+ assertU(adoc(doc));
+
+
+ doc = new SolrInputDocument();
+ doc.setField("id", "20");
+ doc.setField("cat", new String[]{"aaa", "ccc", "ddd"});
+ assertU(adoc(doc));
+
+ doc = new SolrInputDocument();
+ doc.setField("id", "21");
+ doc.setField("cat", new String[]{"aaa", "bbb", "ddd"});
+ assertU(adoc(doc));
+
+
+ assertU(commit());
+
+ assertQ(req("q", "cat:*", "indent", "true"), "//result[@numFound = '4']");
+ assertQ(req("q", "cat:bbb", "indent", "true"), "//result[@numFound = '3']");
+
+
+ doc = new SolrInputDocument();
+ doc.setField("id", "1");
+ List<String> removeList = new ArrayList<>();
+ removeList.add(".b.");
+ removeList.add("c+c");
+ doc.setField("cat", ImmutableMap.of("removeregex", removeList)); //behavior when hitting Solr through ZK
+ assertU(adoc(doc));
+ assertU(commit());
+
+ assertQ(req("q", "cat:*", "indent", "true"), "//result[@numFound = '4']");
+ assertQ(req("q", "cat:bbb", "indent", "true"), "//result[@numFound = '2']");
+
+ doc = new SolrInputDocument();
+ doc.setField("id", "21");
+ removeList = new ArrayList<>();
+ removeList.add("bb*");
+ removeList.add("cc+");
+ doc.setField("cat", ImmutableMap.of("removeregex", removeList)); //behavior when hitting Solr through ZK
+ assertU(adoc(doc));
+ assertU(commit());
+
+ assertQ(req("q", "cat:*", "indent", "true"), "//result[@numFound = '4']");
+ assertQ(req("q", "cat:bbb", "indent", "true"), "//result[@numFound = '1']");
+
+ doc = new SolrInputDocument();
+ doc.setField("id", "1");
+ doc.setField("cat", ImmutableMap.of("removeregex", "a.a")); //behavior when hitting Solr directly
+
+ assertU(adoc(doc));
+ assertU(commit());
+
+ assertQ(req("q", "cat:*", "indent", "true"), "//result[@numFound = '4']");
+ assertQ(req("q", "cat:aaa", "indent", "true"), "//result[@numFound = '3']");
+ }
+
@Test
public void testAdd() throws Exception {
SolrInputDocument doc = new SolrInputDocument();