You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by md...@apache.org on 2022/04/04 17:30:50 UTC
[solr] 01/03: SOLR-16097 Batch add docs when starting tests (#744)
This is an automated email from the ASF dual-hosted git repository.
mdrob pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git
commit 912d9157271fdec2b22261ed086eaeea1b4079f5
Author: Mike Drob <md...@apache.org>
AuthorDate: Wed Mar 30 09:40:59 2022 -0500
SOLR-16097 Batch add docs when starting tests (#744)
Co-authored-by: Christine Poerschke <cp...@apache.org>
---
.../apache/solr/cloud/DistribCursorPagingTest.java | 21 ++++-----
.../component/DistributedFacetPivotLargeTest.java | 4 +-
.../TestDistributedStatsComponentCardinality.java | 51 +++++++++++++---------
.../test/org/apache/solr/update/PeerSyncTest.java | 32 ++++++--------
.../solr/update/TestInPlaceUpdatesDistrib.java | 2 +-
.../apache/solr/BaseDistributedSearchTestCase.java | 49 ++++++++++++++++++---
.../cloud/AbstractBasicDistributedZkTestBase.java | 2 +-
.../solr/cloud/AbstractFullDistribZkTestBase.java | 4 +-
8 files changed, 102 insertions(+), 63 deletions(-)
diff --git a/solr/core/src/test/org/apache/solr/cloud/DistribCursorPagingTest.java b/solr/core/src/test/org/apache/solr/cloud/DistribCursorPagingTest.java
index 47ec9c34459..b26644009f6 100644
--- a/solr/core/src/test/org/apache/solr/cloud/DistribCursorPagingTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/DistribCursorPagingTest.java
@@ -28,6 +28,7 @@ import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.function.Consumer;
+import java.util.stream.IntStream;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.lucene.util.SentinelIntSet;
@@ -41,7 +42,6 @@ import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
-import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.CursorMarkParams;
import org.apache.solr.common.params.GroupParams;
@@ -581,13 +581,10 @@ public class DistribCursorPagingTest extends AbstractFullDistribZkTestBase {
// start with a smallish number of documents, and test that we can do a full walk using a
// sort on *every* field in the schema...
-
- List<SolrInputDocument> initialDocs = new ArrayList<>();
- for (int i = 1; i <= numInitialDocs; i++) {
- SolrInputDocument doc = CursorPagingTest.buildRandomDocument(i);
- initialDocs.add(doc);
- indexDoc(doc);
- }
+ indexDocs(
+ IntStream.rangeClosed(1, numInitialDocs)
+ .mapToObj(CursorPagingTest::buildRandomDocument)
+ .iterator());
commit();
for (String f : allFieldNames) {
@@ -618,10 +615,10 @@ public class DistribCursorPagingTest extends AbstractFullDistribZkTestBase {
}
// now add a lot more docs, and test a handful of randomized multi-level sorts
- for (int i = numInitialDocs + 1; i <= totalDocs; i++) {
- SolrInputDocument doc = CursorPagingTest.buildRandomDocument(i);
- indexDoc(doc);
- }
+ indexDocs(
+ IntStream.rangeClosed(numInitialDocs + 1, totalDocs)
+ .mapToObj(CursorPagingTest::buildRandomDocument)
+ .iterator());
commit();
final int numRandomSorts = atLeast(3);
diff --git a/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLargeTest.java b/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLargeTest.java
index ad2df43b775..e3110ec1951 100644
--- a/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLargeTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLargeTest.java
@@ -1574,13 +1574,13 @@ public class DistributedFacetPivotLargeTest extends BaseDistributedSearchTestCas
* Builds up a SolrInputDocument using the specified fields, then adds it to the specified client
* as well as the control client
*
- * @see #indexDoc(org.apache.solr.client.solrj.SolrClient,SolrParams,SolrInputDocument...)
+ * @see #indexDoc(SolrClient, SolrParams, SolrInputDocument)
* @see #sdoc
*/
private void addPivotDoc(SolrClient client, Object... fields)
throws IOException, SolrServerException {
- indexDoc(client, params(), sdoc(fields));
+ indexDocs(client, null, List.of(sdoc(fields)));
}
private int docNumber = 0;
diff --git a/solr/core/src/test/org/apache/solr/handler/component/TestDistributedStatsComponentCardinality.java b/solr/core/src/test/org/apache/solr/handler/component/TestDistributedStatsComponentCardinality.java
index 4262346b20b..b41cfc02633 100644
--- a/solr/core/src/test/org/apache/solr/handler/component/TestDistributedStatsComponentCardinality.java
+++ b/solr/core/src/test/org/apache/solr/handler/component/TestDistributedStatsComponentCardinality.java
@@ -22,14 +22,18 @@ import java.lang.invoke.MethodHandles;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collections;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.stream.IntStream;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.BaseDistributedSearchTestCase;
import org.apache.solr.SolrTestCaseJ4.SuppressSSL;
import org.apache.solr.client.solrj.response.FieldStatsInfo;
import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.util.LogLevel;
@@ -77,27 +81,32 @@ public class TestDistributedStatsComponentCardinality extends BaseDistributedSea
log.info("Building an index of {} docs", NUM_DOCS);
// we want a big spread in the long values we use, decrement by BIG_PRIME as we index
- long longValue = MAX_LONG;
-
- for (int i = 1; i <= NUM_DOCS; i++) {
- // with these values, we know that every doc indexed has a unique value in all of the
- // fields we will compute cardinality against.
- // which means the number of docs matching a query is the true cardinality for each field
-
- final String strValue = "s" + longValue;
- indexDoc(
- sdoc(
- "id", "" + i,
- "int_i", "" + i,
- "int_i_prehashed_l", "" + HASHER.hashInt(i).asLong(),
- "long_l", "" + longValue,
- "long_l_prehashed_l", "" + HASHER.hashLong(longValue).asLong(),
- "string_s", strValue,
- "string_s_prehashed_l",
- "" + HASHER.hashString(strValue, StandardCharsets.UTF_8).asLong()));
-
- longValue -= BIG_PRIME;
- }
+ final AtomicLong longValue = new AtomicLong(MAX_LONG);
+
+ Iterator<SolrInputDocument> docs =
+ IntStream.rangeClosed(1, NUM_DOCS)
+ .mapToObj(
+ i -> {
+ long currentLong = longValue.getAndAccumulate(BIG_PRIME, (x, y) -> x - y);
+
+ final String strValue = "s" + currentLong;
+
+ // with these values, we know that every doc indexed has a unique value in all of
+ // the fields we will compute cardinality against. which means the number of docs
+ // matching a query is the true cardinality for each field
+ return sdoc(
+ "id", Integer.toString(i),
+ "int_i", Integer.toString(i),
+ "int_i_prehashed_l", Long.toString(HASHER.hashInt(i).asLong()),
+ "long_l", Long.toString(currentLong),
+ "long_l_prehashed_l", Long.toString(HASHER.hashLong(currentLong).asLong()),
+ "string_s", strValue,
+ "string_s_prehashed_l",
+ Long.toString(
+ HASHER.hashString(strValue, StandardCharsets.UTF_8).asLong()));
+ })
+ .iterator();
+ indexDocs(docs);
commit();
}
diff --git a/solr/core/src/test/org/apache/solr/update/PeerSyncTest.java b/solr/core/src/test/org/apache/solr/update/PeerSyncTest.java
index 3e5e03faada..ba3a7ba58bc 100644
--- a/solr/core/src/test/org/apache/solr/update/PeerSyncTest.java
+++ b/solr/core/src/test/org/apache/solr/update/PeerSyncTest.java
@@ -20,6 +20,7 @@ import static org.apache.solr.update.processor.DistributingUpdateProcessorFactor
import static org.hamcrest.core.StringContains.containsString;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedHashSet;
@@ -87,7 +88,7 @@ public class PeerSyncTest extends BaseDistributedSearchTestCase {
SolrClient client1 = clients.get(1);
SolrClient client2 = clients.get(2);
- long v = 0;
+ int v = 0;
add(client0, seenLeader, sdoc("id", "1", "_version_", ++v));
// this fails because client0 has no context (i.e. no updates of its own to judge if applying
@@ -213,26 +214,23 @@ public class PeerSyncTest extends BaseDistributedSearchTestCase {
// now lets check fingerprinting causes appropriate fails
v = 4000;
- add(client0, seenLeader, sdoc("id", Integer.toString((int) v), "_version_", v));
+ add(client0, seenLeader, sdoc("id", Integer.toString(v), "_version_", v));
docsAdded.add(4000);
int toAdd = numVersions + 10;
- for (int i = 0; i < toAdd; i++) {
- add(
- client0,
- seenLeader,
- sdoc("id", Integer.toString((int) v + i + 1), "_version_", v + i + 1));
- add(
- client1,
- seenLeader,
- sdoc("id", Integer.toString((int) v + i + 1), "_version_", v + i + 1));
- docsAdded.add((int) v + i + 1);
+
+ List<SolrInputDocument> additionalDocs = new ArrayList<>(toAdd);
+ for (int i = v + 1; i < v + toAdd + 1; i++) {
+ additionalDocs.add(sdoc("id", Integer.toString(i), "_version_", i));
+ docsAdded.add(i);
}
+ add(client0, seenLeader, additionalDocs);
+ add(client1, seenLeader, additionalDocs);
// client0 now has an additional add beyond our window and the fingerprint should cause this to
// fail
assertSync(client1, numVersions, false, shardsArr[0]);
- // if we turn of fingerprinting, it should succeed
+ // if we turn off fingerprinting, it should succeed
System.setProperty("solr.disableFingerprint", "true");
try {
assertSync(client1, numVersions, true, shardsArr[0]);
@@ -241,15 +239,13 @@ public class PeerSyncTest extends BaseDistributedSearchTestCase {
}
// lets add the missing document and verify that order doesn't matter
- add(client1, seenLeader, sdoc("id", Integer.toString((int) v), "_version_", v));
+ add(client1, seenLeader, sdoc("id", Integer.toString(v), "_version_", v));
assertSync(client1, numVersions, true, shardsArr[0]);
// lets do some overwrites to ensure that repeated updates and maxDoc don't matter
for (int i = 0; i < 10; i++) {
- add(
- client0,
- seenLeader,
- sdoc("id", Integer.toString((int) v + i + 1), "_version_", v + i + 1));
+ // add individually instead of in batch to create more writes
+ add(client0, seenLeader, sdoc("id", Integer.toString(v + i + 1), "_version_", v + i + 1));
}
assertSync(client1, numVersions, true, shardsArr[0]);
diff --git a/solr/core/src/test/org/apache/solr/update/TestInPlaceUpdatesDistrib.java b/solr/core/src/test/org/apache/solr/update/TestInPlaceUpdatesDistrib.java
index 4dd45e6eaa6..e7b3408cad6 100644
--- a/solr/core/src/test/org/apache/solr/update/TestInPlaceUpdatesDistrib.java
+++ b/solr/core/src/test/org/apache/solr/update/TestInPlaceUpdatesDistrib.java
@@ -614,7 +614,7 @@ public class TestInPlaceUpdatesDistrib extends AbstractFullDistribZkTestBase {
nonexistentDocUpdate.setField("id", 2);
nonexistentDocUpdate.setField("inplace_updatable_float", map("set", "50"));
- SolrInputDocument docs[] = new SolrInputDocument[] {existingDocUpdate, nonexistentDocUpdate};
+ List<SolrInputDocument> docs = List.of(existingDocUpdate, nonexistentDocUpdate);
SolrClient solrClient = clients.get(random().nextInt(clients.size()));
add(solrClient, null, docs);
diff --git a/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java b/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java
index 24409fe114b..08bc1a2ef8d 100644
--- a/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java
+++ b/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java
@@ -31,6 +31,7 @@ import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
@@ -569,23 +570,49 @@ public abstract class BaseDistributedSearchTestCase extends SolrTestCaseJ4 {
indexDoc(doc);
}
+ private SolrClient clientFor(SolrInputDocument doc) {
+ return clients.get((doc.getField(id).toString().hashCode() & 0x7fffffff) % clients.size());
+ }
+
/** Indexes the document in both the control client, and a randomly selected client */
protected void indexDoc(SolrInputDocument doc) throws IOException, SolrServerException {
+ indexDoc(clientFor(doc), null, doc);
+ }
+
+ protected void indexDoc(SolrClient client, SolrParams params, SolrInputDocument doc)
+ throws IOException, SolrServerException {
controlClient.add(doc);
if (shardCount == 0) { // mostly for temp debugging
return;
}
- int which = (doc.getField(id).toString().hashCode() & 0x7fffffff) % clients.size();
- SolrClient client = clients.get(which);
client.add(doc);
}
+ /**
+ * Indexes the stream of documents in both the control client and randomly selected clients (per
+ * batch)
+ */
+ public void indexDocs(Iterator<SolrInputDocument> docs) throws SolrServerException, IOException {
+ int batchSize = 100; // TODO make configurable?
+ List<SolrInputDocument> batch = new ArrayList<>(batchSize);
+ while (docs.hasNext()) {
+ batch.add(docs.next());
+ if (batch.size() == batchSize) {
+ indexDocs(clientFor(batch.get(0)), null, batch);
+ batch.clear();
+ }
+ }
+ if (!batch.isEmpty()) {
+ indexDocs(clientFor(batch.get(0)), null, batch);
+ }
+ }
+
/**
* Indexes the document in both the control client and the specified client asserting that the
* responses are equivalent
*/
- protected UpdateResponse indexDoc(
- SolrClient client, SolrParams params, SolrInputDocument... sdocs)
+ protected UpdateResponse indexDocs(
+ SolrClient client, SolrParams params, Iterable<SolrInputDocument> sdocs)
throws IOException, SolrServerException {
UpdateResponse controlRsp = add(controlClient, params, sdocs);
UpdateResponse specificRsp = add(client, params, sdocs);
@@ -593,7 +620,19 @@ public abstract class BaseDistributedSearchTestCase extends SolrTestCaseJ4 {
return specificRsp;
}
- protected UpdateResponse add(SolrClient client, SolrParams params, SolrInputDocument... sdocs)
+ /**
+ * This should not be called in a loop, use {@link #add(SolrClient, SolrParams, Iterable)} instead
+ */
+ protected UpdateResponse add(SolrClient client, SolrParams params, SolrInputDocument sdoc)
+ throws IOException, SolrServerException {
+ UpdateRequest ureq = new UpdateRequest();
+ ureq.setParams(new ModifiableSolrParams(params));
+ ureq.add(sdoc);
+ return ureq.process(client);
+ }
+
+ protected UpdateResponse add(
+ SolrClient client, SolrParams params, Iterable<SolrInputDocument> sdocs)
throws IOException, SolrServerException {
UpdateRequest ureq = new UpdateRequest();
ureq.setParams(new ModifiableSolrParams(params));
diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractBasicDistributedZkTestBase.java b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractBasicDistributedZkTestBase.java
index 3f517ba79ac..281cb9a888c 100644
--- a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractBasicDistributedZkTestBase.java
+++ b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractBasicDistributedZkTestBase.java
@@ -710,7 +710,7 @@ public abstract class AbstractBasicDistributedZkTestBase extends AbstractFullDis
ModifiableSolrParams params = new ModifiableSolrParams();
params.set("commitWithin", 10);
- add(cloudClient, params, getDoc("id", 300), getDoc("id", 301));
+ add(cloudClient, params, List.of(getDoc("id", 300), getDoc("id", 301)));
newSearcherHook.waitForSearcher(DEFAULT_COLLECTION, 2, 20000, false);
diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java
index b138b902c67..b607e063da9 100644
--- a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java
+++ b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java
@@ -2226,9 +2226,7 @@ public abstract class AbstractFullDistribZkTestBase extends AbstractDistribZkTes
}
public static SolrInputDocument getDoc(Object... fields) throws Exception {
- SolrInputDocument doc = new SolrInputDocument();
- addFields(doc, fields);
- return doc;
+ return sdoc(fields);
}
private String checkCollectionExpectations(