You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by er...@apache.org on 2014/10/20 00:45:32 UTC

svn commit: r1633019 - in /lucene/dev/trunk/solr: CHANGES.txt core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java

Author: erick
Date: Sun Oct 19 22:45:31 2014
New Revision: 1633019

URL: http://svn.apache.org/r1633019
Log:
SOLR-5992: add removeregex as an atomic update operation, Thanks Vitaliy

Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1633019&r1=1633018&r2=1633019&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Sun Oct 19 22:45:31 2014
@@ -181,6 +181,8 @@ New Features
 * SOLR-6517: CollectionsAPI call REBALANCELEADERS. Used to balance leaders
   across nodes for a particular collection
 
+* SOLR-5992: add "removeregex" as an atomic update operation
+  (Vitaliy Zhovtyuk via Erick Erickson)
 
 Bug Fixes
 ----------------------

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java?rev=1633019&r1=1633018&r2=1633019&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java Sun Oct 19 22:45:31 2014
@@ -26,6 +26,7 @@ import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
@@ -34,6 +35,8 @@ import java.util.concurrent.ExecutorServ
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.locks.ReentrantLock;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
@@ -1146,6 +1149,10 @@ public class DistributedUpdateProcessor 
               updateField = true;
               doRemove(oldDoc, sif, fieldVal, schema);
               break;
+            case "removeregex":
+              updateField = true;
+              doRemoveRegex(oldDoc, sif, fieldVal);
+              break;
             case "inc":
               updateField = true;
               doInc(oldDoc, schema, sif, fieldVal);
@@ -1225,6 +1232,38 @@ public class DistributedUpdateProcessor 
     return oldSize > existingField.getValueCount();
   }
 
+  private void doRemoveRegex(SolrInputDocument oldDoc, SolrInputField sif, Object valuePatterns) {
+    final String name = sif.getName();
+    final SolrInputField existingField = oldDoc.get(name);
+    if (existingField != null) {
+      final Collection<Object> valueToRemove = new HashSet<>();
+      final Collection<Object> original = existingField.getValues();
+      final Collection<Pattern> patterns = preparePatterns(valuePatterns);
+      for (Object value : original) {
+        for(Pattern pattern : patterns) {
+          final Matcher m = pattern.matcher(value.toString());
+          if (m.matches()) {
+            valueToRemove.add(value);
+          }
+        }
+      }
+      original.removeAll(valueToRemove);
+      oldDoc.setField(name, original);
+    }
+  }
+
+  private Collection<Pattern> preparePatterns(Object fieldVal) {
+    final Collection<Pattern> patterns = new LinkedHashSet<>(1);
+    if (fieldVal instanceof Collection) {
+      Collection<String> patternVals = (Collection<String>) fieldVal;
+      for (String patternVal : patternVals) {
+        patterns.add(Pattern.compile(patternVal));
+      }
+    } else {
+      patterns.add(Pattern.compile(fieldVal.toString()));
+    }
+    return patterns;
+  }
 
   @Override
   public void processDelete(DeleteUpdateCommand cmd) throws IOException {

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java?rev=1633019&r1=1633018&r2=1633019&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java Sun Oct 19 22:45:31 2014
@@ -815,6 +815,73 @@ public class AtomicUpdatesTest extends S
     assertQ(req("q", "floatRemove:\"111.111\"", "indent", "true"), "//result[@numFound = '3']");
   }
 
+ @Test
+  public void testRemoveregex() throws Exception {
+    SolrInputDocument doc;
+
+    doc = new SolrInputDocument();
+    doc.setField("id", "1");
+    doc.setField("cat", new String[]{"aaa", "bbb", "ccc", "ccc", "ddd"});
+    assertU(adoc(doc));
+
+    doc = new SolrInputDocument();
+    doc.setField("id", "2");
+    doc.setField("cat", new String[]{"aaa", "bbb", "bbb", "ccc", "ddd"});
+    assertU(adoc(doc));
+
+
+    doc = new SolrInputDocument();
+    doc.setField("id", "20");
+    doc.setField("cat", new String[]{"aaa", "ccc", "ddd"});
+    assertU(adoc(doc));
+
+    doc = new SolrInputDocument();
+    doc.setField("id", "21");
+    doc.setField("cat", new String[]{"aaa", "bbb", "ddd"});
+    assertU(adoc(doc));
+
+
+    assertU(commit());
+
+    assertQ(req("q", "cat:*", "indent", "true"), "//result[@numFound = '4']");
+    assertQ(req("q", "cat:bbb", "indent", "true"), "//result[@numFound = '3']");
+
+
+    doc = new SolrInputDocument();
+    doc.setField("id", "1");
+    List<String> removeList = new ArrayList<>();
+    removeList.add(".b.");
+    removeList.add("c+c");
+    doc.setField("cat", ImmutableMap.of("removeregex", removeList)); //behavior when hitting Solr through ZK
+    assertU(adoc(doc));
+    assertU(commit());
+
+    assertQ(req("q", "cat:*", "indent", "true"), "//result[@numFound = '4']");
+    assertQ(req("q", "cat:bbb", "indent", "true"), "//result[@numFound = '2']");
+
+    doc = new SolrInputDocument();
+    doc.setField("id", "21");
+    removeList = new ArrayList<>();
+    removeList.add("bb*");
+    removeList.add("cc+");
+    doc.setField("cat", ImmutableMap.of("removeregex", removeList)); //behavior when hitting Solr through ZK
+    assertU(adoc(doc));
+    assertU(commit());
+
+    assertQ(req("q", "cat:*", "indent", "true"), "//result[@numFound = '4']");
+    assertQ(req("q", "cat:bbb", "indent", "true"), "//result[@numFound = '1']");
+
+    doc = new SolrInputDocument();
+    doc.setField("id", "1");
+    doc.setField("cat", ImmutableMap.of("removeregex", "a.a")); //behavior when hitting Solr directly
+
+    assertU(adoc(doc));
+    assertU(commit());
+
+    assertQ(req("q", "cat:*", "indent", "true"), "//result[@numFound = '4']");
+    assertQ(req("q", "cat:aaa", "indent", "true"), "//result[@numFound = '3']");
+  }
+
   @Test
   public void testAdd() throws Exception {
     SolrInputDocument doc = new SolrInputDocument();