You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ge...@apache.org on 2020/11/11 17:56:31 UTC

[lucene-solr] branch branch_8x updated: SOLR-14971: Handle atomic-removes on uncommitted docs (#2056)

This is an automated email from the ASF dual-hosted git repository.

gerlowskija pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/branch_8x by this push:
     new 8e9db02  SOLR-14971: Handle atomic-removes on uncommitted docs (#2056)
8e9db02 is described below

commit 8e9db02530d42a45c9ef89d93ea37f340c9a426c
Author: Jason Gerlowski <ge...@apache.org>
AuthorDate: Wed Nov 11 12:28:11 2020 -0500

    SOLR-14971: Handle atomic-removes on uncommitted docs (#2056)
    
    Docs fetched from the update log via RTG look different than docs
    fetched from commits in the index: the types of
    field-values may be different between the two, etc.
    
    This is a problem for atomic add/remove of field values, where matching
    existing values has historically been done by object equals() calls (via
    Collection operations).  This relies on equality checks which don't have
    flexible enough semantics to match values across these different types.
    (For example, `new Long(1).equals(new Integer(1))` returns `false`).
    This was causing some add-distinct and remove operations on
    uncommitted values to silently fail to remove field values.
    
    This commit patches over this by converting between types in the more
    common cases before using the fallback behavior.
---
 solr/CHANGES.txt                                   |   2 +
 .../processor/AtomicUpdateDocumentMerger.java      | 100 +++++-
 .../update/processor/AtomicUpdateJavabinTest.java  | 370 +++++++++++++++++++++
 .../processor/AtomicUpdateRemovalJavabinTest.java  | 134 --------
 .../solr/update/processor/AtomicUpdatesTest.java   |  32 ++
 5 files changed, 489 insertions(+), 149 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index f6c6f52..acd0da8 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -38,6 +38,8 @@ Bug Fixes
 
 * SOLR-14961: Fix for deleting zookeeper nodes with same path length. Only the first zk-node was removed. (Michael Aleythe via Mike Drob)
 
+* SOLR-14971: AtomicUpdate 'remove', 'add-distinct' operations now works on numeric, date fields in uncommitted docs (Jason Gerlowski)
+
 Other Changes
 ---------------------
 
diff --git a/solr/core/src/java/org/apache/solr/update/processor/AtomicUpdateDocumentMerger.java b/solr/core/src/java/org/apache/solr/update/processor/AtomicUpdateDocumentMerger.java
index 673c4fa..ffc52a7 100644
--- a/solr/core/src/java/org/apache/solr/update/processor/AtomicUpdateDocumentMerger.java
+++ b/solr/core/src/java/org/apache/solr/update/processor/AtomicUpdateDocumentMerger.java
@@ -21,12 +21,15 @@ import java.lang.invoke.MethodHandles;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.Date;
 import java.util.HashSet;
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
+import java.util.Optional;
 import java.util.Set;
+import java.util.function.BiConsumer;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.stream.Stream;
@@ -53,6 +56,7 @@ import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.schema.NumericValueFieldType;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.update.AddUpdateCommand;
+import org.apache.solr.util.DateMathParser;
 import org.apache.solr.util.RefCounted;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -189,7 +193,7 @@ public class AtomicUpdateDocumentMerger {
       if (fieldName.equals(uniqueKeyFieldName)
           || fieldName.equals(CommonParams.VERSION_FIELD)
           || fieldName.equals(routeFieldOrNull)) {
-        if (fieldValue instanceof Map ) {
+        if (fieldValue instanceof Map) {
           throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
               "Updating unique key, version or route field is not allowed: " + sdoc.getField(fieldName));
         } else {
@@ -450,9 +454,6 @@ public class AtomicUpdateDocumentMerger {
     final String name = sif.getName();
     SolrInputField existingField = toDoc.get(name);
 
-    // throws exception if field doesn't exist
-    SchemaField sf = schema.getField(name);
-
     Collection<Object> original = existingField != null ?
         existingField.getValues() :
         new ArrayList<>();
@@ -460,16 +461,10 @@ public class AtomicUpdateDocumentMerger {
     int initialSize = original.size();
     if (fieldVal instanceof Collection) {
       for (Object object : (Collection) fieldVal) {
-        Object obj = sf.getType().toNativeType(object);
-        if (!original.contains(obj)) {
-          original.add(obj);
-        }
+        addValueIfDistinct(name, original, object);
       }
     } else {
-      Object object = sf.getType().toNativeType(fieldVal);
-      if (!original.contains(object)) {
-        original.add(object);
-      }
+      addValueIfDistinct(name, original, fieldVal);
     }
 
     if (original.size() > initialSize) { // update only if more are added
@@ -516,7 +511,7 @@ public class AtomicUpdateDocumentMerger {
     SolrInputField existingField = toDoc.get(name);
     if (existingField == null) return;
     @SuppressWarnings({"rawtypes"})
-    final Collection original = existingField.getValues();
+    final Collection<Object> original = existingField.getValues();
     if (fieldVal instanceof Collection) {
       for (Object object : (Collection) fieldVal) {
         removeObj(original, object, name);
@@ -582,11 +577,11 @@ public class AtomicUpdateDocumentMerger {
     return objValues.iterator().next() instanceof SolrDocumentBase;
   }
 
-  private void removeObj(@SuppressWarnings({"rawtypes"})Collection original, Object toRemove, String fieldName) {
+  private void removeObj(Collection<Object> original, Object toRemove, String fieldName) {
     if(isChildDoc(toRemove)) {
       removeChildDoc(original, (SolrInputDocument) toRemove);
     } else {
-      original.remove(getNativeFieldValue(fieldName, toRemove));
+      removeFieldValueWithNumericFudging(fieldName, original, toRemove);
     }
   }
 
@@ -600,6 +595,81 @@ public class AtomicUpdateDocumentMerger {
     }
   }
 
+  private void removeFieldValueWithNumericFudging(String fieldName, @SuppressWarnings({"rawtypes"}) Collection<Object> original, Object toRemove) {
+    if (original.size() == 0) {
+      return;
+    }
+
+    final BiConsumer<Collection<Object>, Object> removePredicate = (coll, existingElement) -> coll.remove(existingElement);
+    modifyCollectionBasedOnFuzzyPresence(fieldName, original, toRemove, removePredicate, null);
+  }
+
+  private void addValueIfDistinct(String fieldName, Collection<Object> original, Object toAdd) {
+    final BiConsumer<Collection<Object>, Object> addPredicate = (coll, newElement) -> coll.add(newElement);
+    modifyCollectionBasedOnFuzzyPresence(fieldName, original, toAdd, null, addPredicate);
+  }
+
+  /**
+   * Modifies a collection based on the (loosely-judged) presence or absence of a specific value
+   *
+   * Several classes of atomic update (notably 'remove' and 'add-distinct') rely on being able to identify whether an
+   * item is already present in a given list of values.  Unfortunately the 'item' being checked for may be of different
+   * types based on the format of the user request and on where the existing document was pulled from (tlog vs index).
+   * As a result atomic updates needs a "fuzzy" way of checking presence and equality that is more flexible than
+   * traditional equality checks allow.  This method does light type-checking to catch some of these more common cases
+   * (Long compared against Integers, String compared against Date, etc.), and calls the provided lambda to modify the
+   * field values as necessary.
+   *
+   * @param fieldName the field name involved in this atomic update operation
+   * @param original the list of values currently present in the existing document
+   * @param rawValue a value to be checked for in 'original'
+   * @param ifPresent a function to execute if rawValue was found in 'original'
+   * @param ifAbsent a function to execute if rawValue was not found in 'original'
+   */
+  private void modifyCollectionBasedOnFuzzyPresence(String fieldName, Collection<Object> original, Object rawValue,
+                                                    BiConsumer<Collection<Object>, Object> ifPresent,
+                                                    BiConsumer<Collection<Object>, Object> ifAbsent) {
+    Object nativeValue = getNativeFieldValue(fieldName, rawValue);
+    Optional<Object> matchingValue = findObjectWithTypeFuzziness(original, rawValue, nativeValue);
+    if (matchingValue.isPresent() && ifPresent != null) {
+      ifPresent.accept(original, matchingValue.get());
+    } else if( (!matchingValue.isPresent()) && ifAbsent != null) {
+      ifAbsent.accept(original, rawValue);
+    }
+  }
+
+  private Optional<Object> findObjectWithTypeFuzziness(Collection<Object> original, Object rawValue, Object nativeValue) {
+    if (nativeValue instanceof Double || nativeValue instanceof Float) {
+      final Number nativeAsNumber = (Number) nativeValue;
+      return original.stream().filter(val ->
+              val.equals(rawValue) ||
+                      val.equals(nativeValue) ||
+                      (val instanceof Number && ((Number) val).doubleValue() == nativeAsNumber.doubleValue()) ||
+                      (val instanceof String && val.equals(nativeAsNumber.toString())))
+              .findFirst();
+    } else if (nativeValue instanceof Long || nativeValue instanceof Integer) {
+      final Number nativeAsNumber = (Number) nativeValue;
+      return original.stream().filter(val ->
+              val.equals(rawValue) ||
+                      val.equals(nativeValue) ||
+                      (val instanceof Number && ((Number) val).longValue() == nativeAsNumber.longValue()) ||
+                      (val instanceof String && val.equals(nativeAsNumber.toString())))
+              .findFirst();
+    } else if (nativeValue instanceof Date) {
+      return original.stream().filter(val ->
+              val.equals(rawValue) ||
+                      val.equals(nativeValue) ||
+                      (val instanceof String && DateMathParser.parseMath(null, (String)val).equals(nativeValue)))
+              .findFirst();
+    } else if (original.contains(nativeValue)) {
+      return Optional.of(nativeValue);
+    } else if (original.contains(rawValue)) {
+      return Optional.of(rawValue);
+    } else {
+      return Optional.empty();
+    }
+  }
+
   /**
    *
    * @param doc document to search for
diff --git a/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdateJavabinTest.java b/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdateJavabinTest.java
new file mode 100644
index 0000000..758de5f
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdateJavabinTest.java
@@ -0,0 +1,370 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.update.processor;
+
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.client.solrj.request.QueryRequest;
+import org.apache.solr.client.solrj.request.UpdateRequest;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.cloud.SolrCloudTestCase;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.time.Instant;
+import java.util.Collection;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Tests Solr's atomic-update functionality using requests sent through SolrJ using wt=javabin
+ *
+ * {@link AtomicUpdatesTest} covers some of the same functionality, but does so by making xml-based requests.  Recent
+ * changes to Solr have made it possible for the same data sent with different formats to result in different NamedLists
+ * after unmarshalling, so the test duplication is now necessary.  See SOLR-13331 for an example.
+ */
+public class AtomicUpdateJavabinTest extends SolrCloudTestCase {
+  private static final String COMMITTED_DOC_ID = "1";
+  private static final String COMMITTED_DOC_STR_VALUES_ID = "1s";
+  private static final String UNCOMMITTED_DOC_ID = "2";
+  private static final String UNCOMMITTED_DOC_STR_VALUES_ID = "2s";
+  private static final String COLLECTION = "collection1";
+  private static final int NUM_SHARDS = 1;
+  private static final int NUM_REPLICAS = 1;
+  private static final Date DATE_1 = Date.from(Instant.ofEpochSecond(1554243309));
+  private static final String DATE_1_STR = "2019-04-02T22:15:09Z";
+  private static final Date DATE_2 = Date.from(Instant.ofEpochSecond(1554243609));
+  private static final String DATE_2_STR = "2019-04-02T22:20:09Z";
+  private static final Date DATE_3 = Date.from(Instant.ofEpochSecond(1554243909));
+  private static final String DATE_3_STR = "2019-04-02T22:25:09Z";
+
+
+  @BeforeClass
+  public static void setupCluster() throws Exception {
+    configureCluster(1)
+        .addConfig("conf", configset("cloud-dynamic"))
+        .configure();
+
+    CollectionAdminRequest.createCollection(COLLECTION, "conf", NUM_SHARDS, NUM_REPLICAS)
+        .process(cluster.getSolrClient());
+
+    cluster.waitForActiveCollection(COLLECTION, 1, 1);
+  }
+
+  @Before
+  public void setUp() throws Exception {
+    super.setUp();
+
+    final SolrInputDocument committedDoc = sdoc(
+            "id", COMMITTED_DOC_ID,
+            "title_s", "title_1", "title_s", "title_2",
+            "tv_mv_text", "text_1", "tv_mv_text", "text_2",
+            "count_is", 1, "count_is", 2,
+            "count_md", 1.0, "count_md", 2.0,
+            "timestamps_mdt", DATE_1, "timestamps_mdt", DATE_2);
+    final SolrInputDocument committedStrDoc = sdoc(
+            "id", COMMITTED_DOC_STR_VALUES_ID,
+            "title_s", "title_1", "title_s", "title_2",
+            "tv_mv_text", "text_1", "tv_mv_text", "text_2",
+            "count_is", "1", "count_is", "2",
+            "count_md", "1.0", "count_md", "2.0",
+            "timestamps_mdt", DATE_1_STR, "timestamps_mdt", DATE_2_STR);
+    final UpdateRequest committedRequest = new UpdateRequest()
+            .add(committedDoc)
+            .add(committedStrDoc);
+    committedRequest.commit(cluster.getSolrClient(), COLLECTION);
+
+    // Upload a copy of id:1 that's uncommitted to test how atomic-updates modify values in the tlog
+    // See SOLR-14971 for an example of why this case needs tested separately
+    final SolrInputDocument uncommittedDoc = sdoc(
+            "id", UNCOMMITTED_DOC_ID,
+            "title_s", "title_1", "title_s", "title_2",
+            "tv_mv_text", "text_1", "tv_mv_text", "text_2",
+            "count_is", 1, "count_is", 2,
+            "count_md", 1.0, "count_md", 2.0,
+            "timestamps_mdt", DATE_1, "timestamps_mdt", DATE_2);
+    final SolrInputDocument uncommittedStrDoc = sdoc(
+            "id", UNCOMMITTED_DOC_STR_VALUES_ID,
+            "title_s", "title_1", "title_s", "title_2",
+            "tv_mv_text", "text_1", "tv_mv_text", "text_2",
+            "count_is", "1", "count_is", "2",
+            "count_md", "1.0", "count_md", "2.0",
+            "timestamps_mdt", DATE_1_STR, "timestamps_mdt", DATE_2_STR);
+    final UpdateRequest uncommittedRequest = new UpdateRequest()
+            .add(uncommittedDoc)
+            .add(uncommittedStrDoc);
+    uncommittedRequest.process(cluster.getSolrClient(), COLLECTION);
+  }
+
+  @Test
+  public void testAtomicUpdateRemovalOfStrField() throws Exception {
+    ensureFieldHasValues(COMMITTED_DOC_ID, "title_s", "title_1", "title_2");
+    atomicRemoveValueFromField(COMMITTED_DOC_ID, "title_s", "title_1");
+    ensureFieldHasValues(COMMITTED_DOC_ID, "title_s", "title_2");
+
+    ensureFieldHasValues(UNCOMMITTED_DOC_ID, "title_s", "title_1", "title_2");
+    atomicRemoveValueFromField(UNCOMMITTED_DOC_ID, "title_s", "title_1");
+    ensureFieldHasValues(UNCOMMITTED_DOC_ID, "title_s", "title_2");
+  }
+
+  @Test
+  public void testAtomicUpdateRemovalOfTextField() throws Exception {
+    ensureFieldHasValues(COMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2");
+    atomicRemoveValueFromField(COMMITTED_DOC_ID, "tv_mv_text", "text_1");
+    ensureFieldHasValues(COMMITTED_DOC_ID, "tv_mv_text", "text_2");
+
+    ensureFieldHasValues(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2");
+    atomicRemoveValueFromField(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_1");
+    ensureFieldHasValues(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_2");
+  }
+
+  @Test
+  public void testAtomicUpdateRemovalOfIntField() throws Exception {
+    ensureFieldHasValues(COMMITTED_DOC_ID, "count_is", 1, 2);
+    atomicRemoveValueFromField(COMMITTED_DOC_ID, "count_is", 1);
+    ensureFieldHasValues(COMMITTED_DOC_ID, "count_is", 2);
+
+    ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_is", 1, 2);
+    atomicRemoveValueFromField(UNCOMMITTED_DOC_ID, "count_is", 1);
+    ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_is", 2);
+  }
+
+  @Test
+  public void testAtomicUpdateRemovalOfDoubleField() throws Exception {
+    ensureFieldHasValues(COMMITTED_DOC_ID, "count_md", 1.0, 2.0);
+    atomicRemoveValueFromField(COMMITTED_DOC_ID, "count_md", 1.0);
+    ensureFieldHasValues(COMMITTED_DOC_ID, "count_md", 2.0);
+
+    ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_md", 1.0, 2.0);
+    atomicRemoveValueFromField(UNCOMMITTED_DOC_ID, "count_md", 1.0);
+    ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_md", 2.0);
+  }
+
+  @Test
+  public void testAtomicUpdateRemovalOfDateField() throws Exception {
+    ensureFieldHasValues(COMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2);
+    atomicRemoveValueFromField(COMMITTED_DOC_ID, "timestamps_mdt", DATE_1);
+    ensureFieldHasValues(COMMITTED_DOC_ID, "timestamps_mdt", DATE_2);
+
+    ensureFieldHasValues(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2);
+    atomicRemoveValueFromField(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_1);
+    ensureFieldHasValues(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_2);
+  }
+
+  @Test
+  public void testAtomicUpdateAddDistinctOfDistinctValueOnStrField() throws Exception {
+    ensureFieldHasValues(COMMITTED_DOC_ID, "title_s", "title_1", "title_2");
+    atomicAddDistinctValueToField(COMMITTED_DOC_ID, "title_s", "title_3");
+    ensureFieldHasValues(COMMITTED_DOC_ID, "title_s", "title_1", "title_2", "title_3");
+
+    ensureFieldHasValues(UNCOMMITTED_DOC_ID, "title_s", "title_1", "title_2");
+    atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "title_s", "title_3");
+    ensureFieldHasValues(UNCOMMITTED_DOC_ID, "title_s", "title_1", "title_2", "title_3");
+  }
+
+  @Test
+  public void testAtomicUpdateAddDistinctOfDuplicateValueOnStrField() throws Exception {
+    ensureFieldHasValues(COMMITTED_DOC_ID, "title_s", "title_1", "title_2");
+    atomicAddDistinctValueToField(COMMITTED_DOC_ID, "title_s", "title_2");
+    ensureFieldHasValues(COMMITTED_DOC_ID, "title_s", "title_1", "title_2");
+
+    ensureFieldHasValues(UNCOMMITTED_DOC_ID, "title_s", "title_1", "title_2");
+    atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "title_s", "title_2");
+    ensureFieldHasValues(UNCOMMITTED_DOC_ID, "title_s", "title_1", "title_2");
+  }
+
+  @Test
+  public void testAtomicUpdateAddDistinctOfDistinctValueOnTextField() throws Exception {
+    ensureFieldHasValues(COMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2");
+    atomicAddDistinctValueToField(COMMITTED_DOC_ID, "tv_mv_text", "text_3");
+    ensureFieldHasValues(COMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2", "text_3");
+
+    ensureFieldHasValues(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2");
+    atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_3");
+    ensureFieldHasValues(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2", "text_3");
+  }
+
+  @Test
+  public void testAtomicUpdateAddDistinctOfDuplicateValueOnTextField() throws Exception {
+    ensureFieldHasValues(COMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2");
+    atomicAddDistinctValueToField(COMMITTED_DOC_ID, "tv_mv_text", "text_2");
+    ensureFieldHasValues(COMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2");
+
+    ensureFieldHasValues(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2");
+    atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_2");
+    ensureFieldHasValues(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2");
+  }
+
+  @Test
+  public void testAtomicUpdateAddDistinctOfDistinctValueOnIntField() throws Exception {
+    ensureFieldHasValues(COMMITTED_DOC_ID, "count_is", 1, 2);
+    atomicAddDistinctValueToField(COMMITTED_DOC_ID, "count_is", 3);
+    ensureFieldHasValues(COMMITTED_DOC_ID, "count_is", 1, 2, 3);
+
+    ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2);
+    atomicAddDistinctValueToField(COMMITTED_DOC_STR_VALUES_ID, "count_is", 3);
+    ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2, 3);
+
+    ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_is", 1, 2);
+    atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "count_is", 3);
+    ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_is", 1, 2, 3);
+
+    ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2);
+    atomicAddDistinctValueToField(UNCOMMITTED_DOC_STR_VALUES_ID, "count_is", 3);
+    ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2, 3);
+  }
+
+  @Test
+  public void testAtomicUpdateAddDistinctOfDuplicateValueOnIntField() throws Exception {
+    ensureFieldHasValues(COMMITTED_DOC_ID, "count_is", 1, 2);
+    atomicAddDistinctValueToField(COMMITTED_DOC_ID, "count_is", 2);
+    ensureFieldHasValues(COMMITTED_DOC_ID, "count_is", 1, 2);
+
+    ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2);
+    atomicAddDistinctValueToField(COMMITTED_DOC_STR_VALUES_ID, "count_is", 2);
+    ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2);
+
+    ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_is", 1, 2);
+    atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "count_is", 2);
+    ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_is", 1, 2);
+
+    ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2);
+    atomicAddDistinctValueToField(UNCOMMITTED_DOC_STR_VALUES_ID, "count_is", 2);
+    ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2);
+  }
+
+  @Test
+  public void testAtomicUpdateAddDistinctOfDistinctValueOnDoubleField() throws Exception {
+    ensureFieldHasValues(COMMITTED_DOC_ID, "count_md", 1.0, 2.0);
+    atomicAddDistinctValueToField(COMMITTED_DOC_ID, "count_md", 3.0);
+    ensureFieldHasValues(COMMITTED_DOC_ID, "count_md", 1.0, 2.0, 3.0);
+
+    ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0);
+    atomicAddDistinctValueToField(COMMITTED_DOC_STR_VALUES_ID, "count_md", 3.0);
+    ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0, 3.0);
+
+    ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_md", 1.0, 2.0);
+    atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "count_md", 3.0);
+    ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_md", 1.0, 2.0, 3.0);
+
+    ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0);
+    atomicAddDistinctValueToField(UNCOMMITTED_DOC_STR_VALUES_ID, "count_md", 3.0);
+    ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0, 3.0);
+  }
+
+  @Test
+  public void testAtomicUpdateAddDistinctOfDuplicateValueOnDoubleField() throws Exception {
+    ensureFieldHasValues(COMMITTED_DOC_ID, "count_md", 1.0, 2.0);
+    atomicAddDistinctValueToField(COMMITTED_DOC_ID, "count_md", 2.0);
+    ensureFieldHasValues(COMMITTED_DOC_ID, "count_md", 1.0, 2.0);
+
+    ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0);
+    atomicAddDistinctValueToField(COMMITTED_DOC_STR_VALUES_ID, "count_md", 2.0);
+    ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0);
+
+    ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_md", 1.0, 2.0);
+    atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "count_md", 2.0);
+    ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_md", 1.0, 2.0);
+
+    ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0);
+    atomicAddDistinctValueToField(UNCOMMITTED_DOC_STR_VALUES_ID, "count_md", 2.0);
+    ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0);
+  }
+
+  @Test
+  public void testAtomicUpdateAddDistinctOfDistinctValueOnDateField() throws Exception {
+    ensureFieldHasValues(COMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2);
+    atomicAddDistinctValueToField(COMMITTED_DOC_ID, "timestamps_mdt", DATE_3);
+    ensureFieldHasValues(COMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2, DATE_3);
+
+    ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2);
+    atomicAddDistinctValueToField(COMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_3);
+    ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2, DATE_3);
+
+    ensureFieldHasValues(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2);
+    atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_3);
+    ensureFieldHasValues(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2, DATE_3);
+
+    ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2);
+    atomicAddDistinctValueToField(UNCOMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_3);
+    ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2, DATE_3);
+  }
+
+  @Test
+  public void testAtomicUpdateAddDistinctOfDuplicateValueOnDateField() throws Exception {
+    ensureFieldHasValues(COMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2);
+    atomicAddDistinctValueToField(COMMITTED_DOC_ID, "timestamps_mdt", DATE_2);
+    ensureFieldHasValues(COMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2);
+
+    ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2);
+    atomicAddDistinctValueToField(COMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_2);
+    ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2);
+
+    ensureFieldHasValues(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2);
+    atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_2);
+    ensureFieldHasValues(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2);
+
+    ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2);
+    atomicAddDistinctValueToField(UNCOMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_2);
+    ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2);
+  }
+
+  private void atomicRemoveValueFromField(String docId, String fieldName, Object value) throws Exception {
+    final SolrInputDocument doc = new SolrInputDocument();
+    doc.setField("id", docId);
+    Map<String, Object> atomicUpdateRemoval = new HashMap<>(1);
+    atomicUpdateRemoval.put("remove", value);
+    doc.setField(fieldName, atomicUpdateRemoval);
+
+    cluster.getSolrClient().add(COLLECTION, doc);
+  }
+
+  private void atomicAddDistinctValueToField(String docId, String fieldName, Object value) throws Exception {
+    final SolrInputDocument doc = new SolrInputDocument();
+    doc.setField("id", docId);
+    Map<String, Object> atomicUpdateRemoval = new HashMap<>(1);
+    atomicUpdateRemoval.put("add-distinct", value);
+    doc.setField(fieldName, atomicUpdateRemoval);
+
+    cluster.getSolrClient().add(COLLECTION, doc);
+  }
+
+  private void ensureFieldHasValues(String identifyingDocId, String fieldName, Object... expectedValues) throws Exception {
+    final ModifiableSolrParams solrParams = new ModifiableSolrParams();
+    solrParams.set("id", identifyingDocId);
+    QueryRequest request = new QueryRequest(solrParams);
+    request.setPath("/get");
+    final QueryResponse response = request.process(cluster.getSolrClient(), COLLECTION);
+
+    final NamedList<Object> rawResponse = response.getResponse();
+    assertTrue(rawResponse.get("doc") != null);
+    assertTrue(rawResponse.get("doc") instanceof SolrDocument);
+    final SolrDocument doc = (SolrDocument) rawResponse.get("doc");
+    final Collection<Object> valuesAfterUpdate = doc.getFieldValues(fieldName);
+    assertEquals("Expected field to have " + expectedValues.length + " values, but found " + valuesAfterUpdate.size(),
+            expectedValues.length, valuesAfterUpdate.size());
+    for (Object expectedValue: expectedValues) {
+      assertTrue("Expected value [" + expectedValue + "] was not found in field", valuesAfterUpdate.contains(expectedValue));
+    }
+  }
+}
diff --git a/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdateRemovalJavabinTest.java b/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdateRemovalJavabinTest.java
deleted file mode 100644
index 61a94f5..0000000
--- a/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdateRemovalJavabinTest.java
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.solr.update.processor;
-
-import java.time.Instant;
-import java.util.Collection;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.apache.solr.client.solrj.SolrClient;
-import org.apache.solr.client.solrj.SolrQuery;
-import org.apache.solr.client.solrj.request.CollectionAdminRequest;
-import org.apache.solr.client.solrj.request.UpdateRequest;
-import org.apache.solr.client.solrj.response.QueryResponse;
-import org.apache.solr.cloud.SolrCloudTestCase;
-import org.apache.solr.common.SolrDocumentList;
-import org.apache.solr.common.SolrInputDocument;
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-/**
- * Tests Solr's atomic-update functionality using requests sent through SolrJ using wt=javabin
- *
- * {@link AtomicUpdatesTest} covers some of the same functionality, but does so by making xml-based requests.  Recent
- * changes to Solr have made it possible for the same data sent with different formats to result in different NamedLists
- * after unmarshalling, so the test duplication is now necessary.  See SOLR-13331 for an example.
- */
-public class AtomicUpdateRemovalJavabinTest extends SolrCloudTestCase {
-  private static final String COLLECTION = "collection1";
-  private static final int NUM_SHARDS = 1;
-  private static final int NUM_REPLICAS = 1;
-  private static final int MAX_SHARDS_PER_NODE = 1;
-  private static final Date DATE_1 = new Date();
-  private static final Date DATE_2 = Date.from(Instant.ofEpochSecond(1554243909));
-
-  @BeforeClass
-  public static void setupCluster() throws Exception {
-    configureCluster(1)
-        .addConfig("conf", configset("cloud-dynamic"))
-        .configure();
-
-    CollectionAdminRequest.createCollection(COLLECTION, "conf", NUM_SHARDS, NUM_REPLICAS)
-        .setMaxShardsPerNode(MAX_SHARDS_PER_NODE)
-        .process(cluster.getSolrClient());
-
-    cluster.waitForActiveCollection(COLLECTION, 1, 1);
-
-    final SolrInputDocument doc1 = sdoc(
-        "id", "1",
-        "title_s", "title_1", "title_s", "title_2",
-        "tv_mv_text", "text_1", "tv_mv_text", "text_2",
-        "count_is", 1, "count_is", 2,
-        "count_md", 1.0, "count_md", 2.0,
-        "timestamps_mdt", DATE_1, "timestamps_mdt", DATE_2);
-    final UpdateRequest req = new UpdateRequest()
-        .add(doc1);
-    req.commit(cluster.getSolrClient(), COLLECTION);
-  }
-
-  @Test
-  public void testAtomicUpdateRemovalOfStrField() throws Exception {
-    ensureFieldHasValues("1", "title_s", "title_1", "title_2");
-    atomicRemoveValueFromField("1", "title_s", "title_1");
-    ensureFieldHasValues("1", "title_s", "title_2");
-  }
-
-  @Test
-  public void testAtomicUpdateRemovalOfTextField() throws Exception {
-    ensureFieldHasValues("1", "tv_mv_text", "text_1", "text_2");
-    atomicRemoveValueFromField("1", "tv_mv_text", "text_1");
-    ensureFieldHasValues("1", "tv_mv_text", "text_2");
-  }
-
-  @Test
-  public void testAtomicUpdateRemovalOfIntField() throws Exception {
-    ensureFieldHasValues("1", "count_is", 1, 2);
-    atomicRemoveValueFromField("1", "count_is", 1);
-    ensureFieldHasValues("1", "count_is", 2);
-  }
-
-  @Test
-  public void testAtomicUpdateRemovalOfDoubleField() throws Exception {
-    ensureFieldHasValues("1", "count_md", 1.0, 2.0);
-    atomicRemoveValueFromField("1", "count_md", 1.0);
-    ensureFieldHasValues("1", "count_md", 2.0);
-  }
-
-  @Test
-  public void testAtomicUpdateRemovalOfDateField() throws Exception {
-    ensureFieldHasValues("1", "timestamps_mdt", DATE_1, DATE_2);
-    atomicRemoveValueFromField("1", "timestamps_mdt", DATE_1);
-    ensureFieldHasValues("1", "timestamps_mdt", DATE_2);
-  }
-
-  private void atomicRemoveValueFromField(String docId, String fieldName, Object value) throws Exception {
-    final SolrInputDocument doc = new SolrInputDocument();
-    doc.setField("id", docId);
-    Map<String, Object> atomicUpdateRemoval = new HashMap<>(1);
-    atomicUpdateRemoval.put("remove", value);
-    doc.setField(fieldName, atomicUpdateRemoval);
-
-    cluster.getSolrClient().add(COLLECTION, doc);
-    cluster.getSolrClient().commit(COLLECTION);
-  }
-
-  private void ensureFieldHasValues(String identifyingDocId, String fieldName, Object... expectedValues) throws Exception {
-    final SolrClient client = cluster.getSolrClient();
-
-    final QueryResponse response = client.query(COLLECTION, new SolrQuery("id:" + identifyingDocId));
-    final SolrDocumentList docs = response.getResults();
-    assertEquals(1, docs.getNumFound());
-    final Collection<Object> valuesAfterUpdate = docs.get(0).getFieldValues(fieldName);
-    assertEquals(expectedValues.length, valuesAfterUpdate.size());
-    for (Object expectedValue: expectedValues) {
-      assertTrue("Expected value [" + expectedValue + "] was not found in field", valuesAfterUpdate.contains(expectedValue));
-    }
-  }
-}
diff --git a/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java b/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java
index 5a15e10..54e9f9d 100644
--- a/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java
+++ b/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java
@@ -181,6 +181,22 @@ public class AtomicUpdatesTest extends SolrTestCaseJ4 {
 
     assertQ(req("q", "intRemove:[* TO *]", "indent", "true"), "//result[@numFound = '4']");
     assertQ(req("q", "intRemove:111", "indent", "true"), "//result[@numFound = '3']");
+
+    // Test that mv int fields can have values removed prior to being committed to index (see SOLR-14971)
+    doc = new SolrInputDocument();
+    doc.setField("id", "4242");
+    doc.setField("values_is", new String[] {"111", "222", "333"});
+    assertU(adoc(doc));
+
+    doc = new SolrInputDocument();
+    doc.setField("id", "4242");
+    doc.setField("values_is", ImmutableMap.of("remove", 111));
+    assertU(adoc(doc));
+    assertU(commit());
+
+    assertQ(req("q", "values_is:111", "indent", "true"), "//result[@numFound = '0']");
+    assertQ(req("q", "values_is:222", "indent", "true"), "//result[@numFound = '1']");
+    assertQ(req("q", "values_is:333", "indent", "true"), "//result[@numFound = '1']");
   }
 
 
@@ -251,6 +267,22 @@ public class AtomicUpdatesTest extends SolrTestCaseJ4 {
 
     assertQ(req("q", "intRemove:[* TO *]", "indent", "true"), "//result[@numFound = '4']");
     assertQ(req("q", "intRemove:111", "indent", "true"), "//result[@numFound = '3']");
+
+    // Test that mv int fields can have values removed prior to being committed to index (see SOLR-14971)
+    doc = new SolrInputDocument();
+    doc.setField("id", "4242");
+    doc.setField("values_is", new Integer[] {111, 222, 333});
+    assertU(adoc(doc));
+
+    doc = new SolrInputDocument();
+    doc.setField("id", "4242");
+    doc.setField("values_is", ImmutableMap.of("remove", 111));
+    assertU(adoc(doc));
+    assertU(commit());
+
+    assertQ(req("q", "values_is:111", "indent", "true"), "//result[@numFound = '0']");
+    assertQ(req("q", "values_is:222", "indent", "true"), "//result[@numFound = '1']");
+    assertQ(req("q", "values_is:333", "indent", "true"), "//result[@numFound = '1']");
   }
 
   @Test