You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by md...@apache.org on 2022/04/04 17:30:50 UTC

[solr] 01/03: SOLR-16097 Batch add docs when starting tests (#744)

This is an automated email from the ASF dual-hosted git repository.

mdrob pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git

commit 912d9157271fdec2b22261ed086eaeea1b4079f5
Author: Mike Drob <md...@apache.org>
AuthorDate: Wed Mar 30 09:40:59 2022 -0500

    SOLR-16097 Batch add docs when starting tests (#744)
    
    Co-authored-by: Christine Poerschke <cp...@apache.org>
---
 .../apache/solr/cloud/DistribCursorPagingTest.java | 21 ++++-----
 .../component/DistributedFacetPivotLargeTest.java  |  4 +-
 .../TestDistributedStatsComponentCardinality.java  | 51 +++++++++++++---------
 .../test/org/apache/solr/update/PeerSyncTest.java  | 32 ++++++--------
 .../solr/update/TestInPlaceUpdatesDistrib.java     |  2 +-
 .../apache/solr/BaseDistributedSearchTestCase.java | 49 ++++++++++++++++++---
 .../cloud/AbstractBasicDistributedZkTestBase.java  |  2 +-
 .../solr/cloud/AbstractFullDistribZkTestBase.java  |  4 +-
 8 files changed, 102 insertions(+), 63 deletions(-)

diff --git a/solr/core/src/test/org/apache/solr/cloud/DistribCursorPagingTest.java b/solr/core/src/test/org/apache/solr/cloud/DistribCursorPagingTest.java
index 47ec9c34459..b26644009f6 100644
--- a/solr/core/src/test/org/apache/solr/cloud/DistribCursorPagingTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/DistribCursorPagingTest.java
@@ -28,6 +28,7 @@ import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 import java.util.function.Consumer;
+import java.util.stream.IntStream;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.lucene.util.LuceneTestCase.Slow;
 import org.apache.lucene.util.SentinelIntSet;
@@ -41,7 +42,6 @@ import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrDocumentList;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrException.ErrorCode;
-import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.CursorMarkParams;
 import org.apache.solr.common.params.GroupParams;
@@ -581,13 +581,10 @@ public class DistribCursorPagingTest extends AbstractFullDistribZkTestBase {
 
     // start with a smallish number of documents, and test that we can do a full walk using a
     // sort on *every* field in the schema...
-
-    List<SolrInputDocument> initialDocs = new ArrayList<>();
-    for (int i = 1; i <= numInitialDocs; i++) {
-      SolrInputDocument doc = CursorPagingTest.buildRandomDocument(i);
-      initialDocs.add(doc);
-      indexDoc(doc);
-    }
+    indexDocs(
+        IntStream.rangeClosed(1, numInitialDocs)
+            .mapToObj(CursorPagingTest::buildRandomDocument)
+            .iterator());
     commit();
 
     for (String f : allFieldNames) {
@@ -618,10 +615,10 @@ public class DistribCursorPagingTest extends AbstractFullDistribZkTestBase {
     }
 
     // now add a lot more docs, and test a handful of randomized multi-level sorts
-    for (int i = numInitialDocs + 1; i <= totalDocs; i++) {
-      SolrInputDocument doc = CursorPagingTest.buildRandomDocument(i);
-      indexDoc(doc);
-    }
+    indexDocs(
+        IntStream.rangeClosed(numInitialDocs + 1, totalDocs)
+            .mapToObj(CursorPagingTest::buildRandomDocument)
+            .iterator());
     commit();
 
     final int numRandomSorts = atLeast(3);
diff --git a/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLargeTest.java b/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLargeTest.java
index ad2df43b775..e3110ec1951 100644
--- a/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLargeTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLargeTest.java
@@ -1574,13 +1574,13 @@ public class DistributedFacetPivotLargeTest extends BaseDistributedSearchTestCas
    * Builds up a SolrInputDocument using the specified fields, then adds it to the specified client
    * as well as the control client
    *
-   * @see #indexDoc(org.apache.solr.client.solrj.SolrClient,SolrParams,SolrInputDocument...)
+   * @see #indexDoc(SolrClient, SolrParams, SolrInputDocument)
    * @see #sdoc
    */
   private void addPivotDoc(SolrClient client, Object... fields)
       throws IOException, SolrServerException {
 
-    indexDoc(client, params(), sdoc(fields));
+    indexDocs(client, null, List.of(sdoc(fields)));
   }
 
   private int docNumber = 0;
diff --git a/solr/core/src/test/org/apache/solr/handler/component/TestDistributedStatsComponentCardinality.java b/solr/core/src/test/org/apache/solr/handler/component/TestDistributedStatsComponentCardinality.java
index 4262346b20b..b41cfc02633 100644
--- a/solr/core/src/test/org/apache/solr/handler/component/TestDistributedStatsComponentCardinality.java
+++ b/solr/core/src/test/org/apache/solr/handler/component/TestDistributedStatsComponentCardinality.java
@@ -22,14 +22,18 @@ import java.lang.invoke.MethodHandles;
 import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.stream.IntStream;
 import org.apache.lucene.util.LuceneTestCase.Slow;
 import org.apache.lucene.util.TestUtil;
 import org.apache.solr.BaseDistributedSearchTestCase;
 import org.apache.solr.SolrTestCaseJ4.SuppressSSL;
 import org.apache.solr.client.solrj.response.FieldStatsInfo;
 import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.util.LogLevel;
@@ -77,27 +81,32 @@ public class TestDistributedStatsComponentCardinality extends BaseDistributedSea
     log.info("Building an index of {} docs", NUM_DOCS);
 
     // we want a big spread in the long values we use, decrement by BIG_PRIME as we index
-    long longValue = MAX_LONG;
-
-    for (int i = 1; i <= NUM_DOCS; i++) {
-      // with these values, we know that every doc indexed has a unique value in all of the
-      // fields we will compute cardinality against.
-      // which means the number of docs matching a query is the true cardinality for each field
-
-      final String strValue = "s" + longValue;
-      indexDoc(
-          sdoc(
-              "id", "" + i,
-              "int_i", "" + i,
-              "int_i_prehashed_l", "" + HASHER.hashInt(i).asLong(),
-              "long_l", "" + longValue,
-              "long_l_prehashed_l", "" + HASHER.hashLong(longValue).asLong(),
-              "string_s", strValue,
-              "string_s_prehashed_l",
-                  "" + HASHER.hashString(strValue, StandardCharsets.UTF_8).asLong()));
-
-      longValue -= BIG_PRIME;
-    }
+    final AtomicLong longValue = new AtomicLong(MAX_LONG);
+
+    Iterator<SolrInputDocument> docs =
+        IntStream.rangeClosed(1, NUM_DOCS)
+            .mapToObj(
+                i -> {
+                  long currentLong = longValue.getAndAccumulate(BIG_PRIME, (x, y) -> x - y);
+
+                  final String strValue = "s" + currentLong;
+
+                  // with these values, we know that every doc indexed has a unique value in all of
+                  // the fields we will compute cardinality against. which means the number of docs
+                  // matching a query is the true cardinality for each field
+                  return sdoc(
+                      "id", Integer.toString(i),
+                      "int_i", Integer.toString(i),
+                      "int_i_prehashed_l", Long.toString(HASHER.hashInt(i).asLong()),
+                      "long_l", Long.toString(currentLong),
+                      "long_l_prehashed_l", Long.toString(HASHER.hashLong(currentLong).asLong()),
+                      "string_s", strValue,
+                      "string_s_prehashed_l",
+                          Long.toString(
+                              HASHER.hashString(strValue, StandardCharsets.UTF_8).asLong()));
+                })
+            .iterator();
+    indexDocs(docs);
 
     commit();
   }
diff --git a/solr/core/src/test/org/apache/solr/update/PeerSyncTest.java b/solr/core/src/test/org/apache/solr/update/PeerSyncTest.java
index 3e5e03faada..ba3a7ba58bc 100644
--- a/solr/core/src/test/org/apache/solr/update/PeerSyncTest.java
+++ b/solr/core/src/test/org/apache/solr/update/PeerSyncTest.java
@@ -20,6 +20,7 @@ import static org.apache.solr.update.processor.DistributingUpdateProcessorFactor
 import static org.hamcrest.core.StringContains.containsString;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.LinkedHashSet;
@@ -87,7 +88,7 @@ public class PeerSyncTest extends BaseDistributedSearchTestCase {
     SolrClient client1 = clients.get(1);
     SolrClient client2 = clients.get(2);
 
-    long v = 0;
+    int v = 0;
     add(client0, seenLeader, sdoc("id", "1", "_version_", ++v));
 
     // this fails because client0 has no context (i.e. no updates of its own to judge if applying
@@ -213,26 +214,23 @@ public class PeerSyncTest extends BaseDistributedSearchTestCase {
 
     // now lets check fingerprinting causes appropriate fails
     v = 4000;
-    add(client0, seenLeader, sdoc("id", Integer.toString((int) v), "_version_", v));
+    add(client0, seenLeader, sdoc("id", Integer.toString(v), "_version_", v));
     docsAdded.add(4000);
     int toAdd = numVersions + 10;
-    for (int i = 0; i < toAdd; i++) {
-      add(
-          client0,
-          seenLeader,
-          sdoc("id", Integer.toString((int) v + i + 1), "_version_", v + i + 1));
-      add(
-          client1,
-          seenLeader,
-          sdoc("id", Integer.toString((int) v + i + 1), "_version_", v + i + 1));
-      docsAdded.add((int) v + i + 1);
+
+    List<SolrInputDocument> additionalDocs = new ArrayList<>(toAdd);
+    for (int i = v + 1; i < v + toAdd + 1; i++) {
+      additionalDocs.add(sdoc("id", Integer.toString(i), "_version_", i));
+      docsAdded.add(i);
     }
+    add(client0, seenLeader, additionalDocs);
+    add(client1, seenLeader, additionalDocs);
 
     // client0 now has an additional add beyond our window and the fingerprint should cause this to
     // fail
     assertSync(client1, numVersions, false, shardsArr[0]);
 
-    // if we turn of fingerprinting, it should succeed
+    // if we turn off fingerprinting, it should succeed
     System.setProperty("solr.disableFingerprint", "true");
     try {
       assertSync(client1, numVersions, true, shardsArr[0]);
@@ -241,15 +239,13 @@ public class PeerSyncTest extends BaseDistributedSearchTestCase {
     }
 
     // lets add the missing document and verify that order doesn't matter
-    add(client1, seenLeader, sdoc("id", Integer.toString((int) v), "_version_", v));
+    add(client1, seenLeader, sdoc("id", Integer.toString(v), "_version_", v));
     assertSync(client1, numVersions, true, shardsArr[0]);
 
     // lets do some overwrites to ensure that repeated updates and maxDoc don't matter
     for (int i = 0; i < 10; i++) {
-      add(
-          client0,
-          seenLeader,
-          sdoc("id", Integer.toString((int) v + i + 1), "_version_", v + i + 1));
+      // add individually instead of in batch to create more writes
+      add(client0, seenLeader, sdoc("id", Integer.toString(v + i + 1), "_version_", v + i + 1));
     }
     assertSync(client1, numVersions, true, shardsArr[0]);
 
diff --git a/solr/core/src/test/org/apache/solr/update/TestInPlaceUpdatesDistrib.java b/solr/core/src/test/org/apache/solr/update/TestInPlaceUpdatesDistrib.java
index 4dd45e6eaa6..e7b3408cad6 100644
--- a/solr/core/src/test/org/apache/solr/update/TestInPlaceUpdatesDistrib.java
+++ b/solr/core/src/test/org/apache/solr/update/TestInPlaceUpdatesDistrib.java
@@ -614,7 +614,7 @@ public class TestInPlaceUpdatesDistrib extends AbstractFullDistribZkTestBase {
     nonexistentDocUpdate.setField("id", 2);
     nonexistentDocUpdate.setField("inplace_updatable_float", map("set", "50"));
 
-    SolrInputDocument docs[] = new SolrInputDocument[] {existingDocUpdate, nonexistentDocUpdate};
+    List<SolrInputDocument> docs = List.of(existingDocUpdate, nonexistentDocUpdate);
 
     SolrClient solrClient = clients.get(random().nextInt(clients.size()));
     add(solrClient, null, docs);
diff --git a/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java b/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java
index 24409fe114b..08bc1a2ef8d 100644
--- a/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java
+++ b/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java
@@ -31,6 +31,7 @@ import java.util.Collections;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Properties;
@@ -569,23 +570,49 @@ public abstract class BaseDistributedSearchTestCase extends SolrTestCaseJ4 {
     indexDoc(doc);
   }
 
+  private SolrClient clientFor(SolrInputDocument doc) {
+    return clients.get((doc.getField(id).toString().hashCode() & 0x7fffffff) % clients.size());
+  }
+
   /** Indexes the document in both the control client, and a randomly selected client */
   protected void indexDoc(SolrInputDocument doc) throws IOException, SolrServerException {
+    indexDoc(clientFor(doc), null, doc);
+  }
+
+  protected void indexDoc(SolrClient client, SolrParams params, SolrInputDocument doc)
+      throws IOException, SolrServerException {
     controlClient.add(doc);
     if (shardCount == 0) { // mostly for temp debugging
       return;
     }
-    int which = (doc.getField(id).toString().hashCode() & 0x7fffffff) % clients.size();
-    SolrClient client = clients.get(which);
     client.add(doc);
   }
 
+  /**
+   * Indexes the stream of documents in both the control client and randomly selected clients (per
+   * batch)
+   */
+  public void indexDocs(Iterator<SolrInputDocument> docs) throws SolrServerException, IOException {
+    int batchSize = 100; // TODO make configurable?
+    List<SolrInputDocument> batch = new ArrayList<>(batchSize);
+    while (docs.hasNext()) {
+      batch.add(docs.next());
+      if (batch.size() == batchSize) {
+        indexDocs(clientFor(batch.get(0)), null, batch);
+        batch.clear();
+      }
+    }
+    if (!batch.isEmpty()) {
+      indexDocs(clientFor(batch.get(0)), null, batch);
+    }
+  }
+
   /**
    * Indexes the document in both the control client and the specified client asserting that the
    * responses are equivalent
    */
-  protected UpdateResponse indexDoc(
-      SolrClient client, SolrParams params, SolrInputDocument... sdocs)
+  protected UpdateResponse indexDocs(
+      SolrClient client, SolrParams params, Iterable<SolrInputDocument> sdocs)
       throws IOException, SolrServerException {
     UpdateResponse controlRsp = add(controlClient, params, sdocs);
     UpdateResponse specificRsp = add(client, params, sdocs);
@@ -593,7 +620,19 @@ public abstract class BaseDistributedSearchTestCase extends SolrTestCaseJ4 {
     return specificRsp;
   }
 
-  protected UpdateResponse add(SolrClient client, SolrParams params, SolrInputDocument... sdocs)
+  /**
+   * This should not be called in a loop, use {@link #add(SolrClient, SolrParams, Iterable)} instead
+   */
+  protected UpdateResponse add(SolrClient client, SolrParams params, SolrInputDocument sdoc)
+      throws IOException, SolrServerException {
+    UpdateRequest ureq = new UpdateRequest();
+    ureq.setParams(new ModifiableSolrParams(params));
+    ureq.add(sdoc);
+    return ureq.process(client);
+  }
+
+  protected UpdateResponse add(
+      SolrClient client, SolrParams params, Iterable<SolrInputDocument> sdocs)
       throws IOException, SolrServerException {
     UpdateRequest ureq = new UpdateRequest();
     ureq.setParams(new ModifiableSolrParams(params));
diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractBasicDistributedZkTestBase.java b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractBasicDistributedZkTestBase.java
index 3f517ba79ac..281cb9a888c 100644
--- a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractBasicDistributedZkTestBase.java
+++ b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractBasicDistributedZkTestBase.java
@@ -710,7 +710,7 @@ public abstract class AbstractBasicDistributedZkTestBase extends AbstractFullDis
 
     ModifiableSolrParams params = new ModifiableSolrParams();
     params.set("commitWithin", 10);
-    add(cloudClient, params, getDoc("id", 300), getDoc("id", 301));
+    add(cloudClient, params, List.of(getDoc("id", 300), getDoc("id", 301)));
 
     newSearcherHook.waitForSearcher(DEFAULT_COLLECTION, 2, 20000, false);
 
diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java
index b138b902c67..b607e063da9 100644
--- a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java
+++ b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java
@@ -2226,9 +2226,7 @@ public abstract class AbstractFullDistribZkTestBase extends AbstractDistribZkTes
   }
 
   public static SolrInputDocument getDoc(Object... fields) throws Exception {
-    SolrInputDocument doc = new SolrInputDocument();
-    addFields(doc, fields);
-    return doc;
+    return sdoc(fields);
   }
 
   private String checkCollectionExpectations(