You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2022/11/17 13:44:39 UTC
[lucene] branch branch_9x updated: Decrease test time for TestManyKnnDocs.testLargeSegment (#11945)
This is an automated email from the ASF dual-hosted git repository.
rmuir pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new 3973209b20a Decrease test time for TestManyKnnDocs.testLargeSegment (#11945)
3973209b20a is described below
commit 3973209b20a8552ecd3926a0588818b2ea694b7b
Author: Jack Conradson <os...@gmail.com>
AuthorDate: Wed Nov 16 20:52:32 2022 -0800
Decrease test time for TestManyKnnDocs.testLargeSegment (#11945)
* Improve speed of TestManyKnnDocs
---
.../apache/lucene/document/TestManyKnnDocs.java | 9 +++--
lucene/test-framework/src/java/module-info.java | 4 +-
.../tests/codecs/vector/ConfigurableMCodec.java | 47 ++++++++++++++++++++++
.../lucene/tests/codecs/vector/package-info.java | 19 +++++++++
.../services/org.apache.lucene.codecs.Codec | 1 +
5 files changed, 75 insertions(+), 5 deletions(-)
diff --git a/lucene/core/src/test/org/apache/lucene/document/TestManyKnnDocs.java b/lucene/core/src/test/org/apache/lucene/document/TestManyKnnDocs.java
index bc3a249ffa0..5eb419fef8f 100644
--- a/lucene/core/src/test/org/apache/lucene/document/TestManyKnnDocs.java
+++ b/lucene/core/src/test/org/apache/lucene/document/TestManyKnnDocs.java
@@ -23,19 +23,20 @@ import org.apache.lucene.search.KnnVectorQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.tests.codecs.vector.ConfigurableMCodec;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.LuceneTestCase.Monster;
-import org.apache.lucene.tests.util.TestUtil;
@TimeoutSuite(millis = 86_400_000) // 24 hour timeout
-@Monster("takes ~2 hours and needs extra heap, disk space, file handles")
+@Monster("takes ~10 minutes and needs extra heap, disk space, file handles")
public class TestManyKnnDocs extends LuceneTestCase {
// gradlew -p lucene/core test --tests TestManyKnnDocs -Ptests.heapsize=16g -Dtests.monster=true
public void testLargeSegment() throws Exception {
IndexWriterConfig iwc = new IndexWriterConfig();
iwc.setCodec(
- TestUtil.getDefaultCodec()); // Make sure to use the default codec instead of a random one
+ new ConfigurableMCodec(
+ 128)); // Make sure to use the ConfigurableMCodec instead of a random one
iwc.setRAMBufferSizeMB(64); // Use a 64MB buffer to create larger initial segments
TieredMergePolicy mp = new TieredMergePolicy();
mp.setMaxMergeAtOnce(256); // avoid intermediate merges (waste of time with HNSW?)
@@ -47,7 +48,7 @@ public class TestManyKnnDocs extends LuceneTestCase {
try (Directory dir = FSDirectory.open(createTempDir("ManyKnnVectorDocs"));
IndexWriter iw = new IndexWriter(dir, iwc)) {
- int numVectors = 16268816;
+ int numVectors = 2088992;
float[] vector = new float[1];
Document doc = new Document();
doc.add(new KnnVectorField(fieldName, vector, similarityFunction));
diff --git a/lucene/test-framework/src/java/module-info.java b/lucene/test-framework/src/java/module-info.java
index 893d57189bb..f366d1f52b7 100644
--- a/lucene/test-framework/src/java/module-info.java
+++ b/lucene/test-framework/src/java/module-info.java
@@ -40,6 +40,7 @@ module org.apache.lucene.test_framework {
exports org.apache.lucene.tests.codecs.ramonly;
exports org.apache.lucene.tests.codecs.uniformsplit.sharedterms;
exports org.apache.lucene.tests.codecs.uniformsplit;
+ exports org.apache.lucene.tests.codecs.vector;
exports org.apache.lucene.tests.geo;
exports org.apache.lucene.tests.index;
exports org.apache.lucene.tests.mockfile;
@@ -58,7 +59,8 @@ module org.apache.lucene.test_framework {
org.apache.lucene.tests.codecs.compressing.FastDecompressionCompressingCodec,
org.apache.lucene.tests.codecs.compressing.HighCompressionCompressingCodec,
org.apache.lucene.tests.codecs.compressing.LZ4WithPresetCompressingCodec,
- org.apache.lucene.tests.codecs.compressing.dummy.DummyCompressingCodec;
+ org.apache.lucene.tests.codecs.compressing.dummy.DummyCompressingCodec,
+ org.apache.lucene.tests.codecs.vector.ConfigurableMCodec;
provides org.apache.lucene.codecs.DocValuesFormat with
org.apache.lucene.tests.codecs.asserting.AssertingDocValuesFormat;
provides org.apache.lucene.codecs.KnnVectorsFormat with
diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/vector/ConfigurableMCodec.java b/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/vector/ConfigurableMCodec.java
new file mode 100644
index 00000000000..89d0e6a970d
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/vector/ConfigurableMCodec.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.tests.codecs.vector;
+
+import org.apache.lucene.codecs.FilterCodec;
+import org.apache.lucene.codecs.KnnVectorsFormat;
+import org.apache.lucene.codecs.lucene94.Lucene94Codec;
+import org.apache.lucene.codecs.lucene94.Lucene94HnswVectorsFormat;
+
+/**
+ * This codec allows customization of the number of connections made for an hnsw index. Increasing
+ * the number of connections can decrease the time of certain tests while still achieving the same
+ * test coverage.
+ */
+public class ConfigurableMCodec extends FilterCodec {
+
+ private final KnnVectorsFormat knnVectorsFormat;
+
+ public ConfigurableMCodec() {
+ super("ConfigurableMCodec", new Lucene94Codec());
+ knnVectorsFormat = new Lucene94HnswVectorsFormat(128, 100);
+ }
+
+ public ConfigurableMCodec(int maxConn) {
+ super("ConfigurableMCodec", new Lucene94Codec());
+ knnVectorsFormat = new Lucene94HnswVectorsFormat(maxConn, 100);
+ }
+
+ @Override
+ public KnnVectorsFormat knnVectorsFormat() {
+ return knnVectorsFormat;
+ }
+}
diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/vector/package-info.java b/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/vector/package-info.java
new file mode 100644
index 00000000000..6c1ab143cc9
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/vector/package-info.java
@@ -0,0 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Codecs for testing different vector formats. */
+package org.apache.lucene.tests.codecs.vector;
diff --git a/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec b/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
index cdc3aeca2a9..7b75d2bd4d3 100644
--- a/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
+++ b/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
@@ -21,3 +21,4 @@ org.apache.lucene.tests.codecs.compressing.FastDecompressionCompressingCodec
org.apache.lucene.tests.codecs.compressing.HighCompressionCompressingCodec
org.apache.lucene.tests.codecs.compressing.LZ4WithPresetCompressingCodec
org.apache.lucene.tests.codecs.compressing.dummy.DummyCompressingCodec
+org.apache.lucene.tests.codecs.vector.ConfigurableMCodec