You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2022/11/17 13:44:39 UTC

[lucene] branch branch_9x updated: Decrease test time for TestManyKnnDocs.testLargeSegment (#11945)

This is an automated email from the ASF dual-hosted git repository.

rmuir pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new 3973209b20a Decrease test time for TestManyKnnDocs.testLargeSegment (#11945)
3973209b20a is described below

commit 3973209b20a8552ecd3926a0588818b2ea694b7b
Author: Jack Conradson <os...@gmail.com>
AuthorDate: Wed Nov 16 20:52:32 2022 -0800

    Decrease test time for TestManyKnnDocs.testLargeSegment (#11945)
    
    * Improve speed of TestManyKnnDocs
---
 .../apache/lucene/document/TestManyKnnDocs.java    |  9 +++--
 lucene/test-framework/src/java/module-info.java    |  4 +-
 .../tests/codecs/vector/ConfigurableMCodec.java    | 47 ++++++++++++++++++++++
 .../lucene/tests/codecs/vector/package-info.java   | 19 +++++++++
 .../services/org.apache.lucene.codecs.Codec        |  1 +
 5 files changed, 75 insertions(+), 5 deletions(-)

diff --git a/lucene/core/src/test/org/apache/lucene/document/TestManyKnnDocs.java b/lucene/core/src/test/org/apache/lucene/document/TestManyKnnDocs.java
index bc3a249ffa0..5eb419fef8f 100644
--- a/lucene/core/src/test/org/apache/lucene/document/TestManyKnnDocs.java
+++ b/lucene/core/src/test/org/apache/lucene/document/TestManyKnnDocs.java
@@ -23,19 +23,20 @@ import org.apache.lucene.search.KnnVectorQuery;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.tests.codecs.vector.ConfigurableMCodec;
 import org.apache.lucene.tests.util.LuceneTestCase;
 import org.apache.lucene.tests.util.LuceneTestCase.Monster;
-import org.apache.lucene.tests.util.TestUtil;
 
 @TimeoutSuite(millis = 86_400_000) // 24 hour timeout
-@Monster("takes ~2 hours and needs extra heap, disk space, file handles")
+@Monster("takes ~10 minutes and needs extra heap, disk space, file handles")
 public class TestManyKnnDocs extends LuceneTestCase {
   // gradlew -p lucene/core test --tests TestManyKnnDocs -Ptests.heapsize=16g -Dtests.monster=true
 
   public void testLargeSegment() throws Exception {
     IndexWriterConfig iwc = new IndexWriterConfig();
     iwc.setCodec(
-        TestUtil.getDefaultCodec()); // Make sure to use the default codec instead of a random one
+        new ConfigurableMCodec(
+            128)); // Make sure to use the ConfigurableMCodec instead of a random one
     iwc.setRAMBufferSizeMB(64); // Use a 64MB buffer to create larger initial segments
     TieredMergePolicy mp = new TieredMergePolicy();
     mp.setMaxMergeAtOnce(256); // avoid intermediate merges (waste of time with HNSW?)
@@ -47,7 +48,7 @@ public class TestManyKnnDocs extends LuceneTestCase {
     try (Directory dir = FSDirectory.open(createTempDir("ManyKnnVectorDocs"));
         IndexWriter iw = new IndexWriter(dir, iwc)) {
 
-      int numVectors = 16268816;
+      int numVectors = 2088992;
       float[] vector = new float[1];
       Document doc = new Document();
       doc.add(new KnnVectorField(fieldName, vector, similarityFunction));
diff --git a/lucene/test-framework/src/java/module-info.java b/lucene/test-framework/src/java/module-info.java
index 893d57189bb..f366d1f52b7 100644
--- a/lucene/test-framework/src/java/module-info.java
+++ b/lucene/test-framework/src/java/module-info.java
@@ -40,6 +40,7 @@ module org.apache.lucene.test_framework {
   exports org.apache.lucene.tests.codecs.ramonly;
   exports org.apache.lucene.tests.codecs.uniformsplit.sharedterms;
   exports org.apache.lucene.tests.codecs.uniformsplit;
+  exports org.apache.lucene.tests.codecs.vector;
   exports org.apache.lucene.tests.geo;
   exports org.apache.lucene.tests.index;
   exports org.apache.lucene.tests.mockfile;
@@ -58,7 +59,8 @@ module org.apache.lucene.test_framework {
       org.apache.lucene.tests.codecs.compressing.FastDecompressionCompressingCodec,
       org.apache.lucene.tests.codecs.compressing.HighCompressionCompressingCodec,
       org.apache.lucene.tests.codecs.compressing.LZ4WithPresetCompressingCodec,
-      org.apache.lucene.tests.codecs.compressing.dummy.DummyCompressingCodec;
+      org.apache.lucene.tests.codecs.compressing.dummy.DummyCompressingCodec,
+      org.apache.lucene.tests.codecs.vector.ConfigurableMCodec;
   provides org.apache.lucene.codecs.DocValuesFormat with
       org.apache.lucene.tests.codecs.asserting.AssertingDocValuesFormat;
   provides org.apache.lucene.codecs.KnnVectorsFormat with
diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/vector/ConfigurableMCodec.java b/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/vector/ConfigurableMCodec.java
new file mode 100644
index 00000000000..89d0e6a970d
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/vector/ConfigurableMCodec.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.tests.codecs.vector;
+
+import org.apache.lucene.codecs.FilterCodec;
+import org.apache.lucene.codecs.KnnVectorsFormat;
+import org.apache.lucene.codecs.lucene94.Lucene94Codec;
+import org.apache.lucene.codecs.lucene94.Lucene94HnswVectorsFormat;
+
+/**
+ * This codec allows customization of the number of connections made for an hnsw index. Increasing
+ * the number of connections can decrease the time of certain tests while still achieving the same
+ * test coverage.
+ */
+public class ConfigurableMCodec extends FilterCodec {
+
+  private final KnnVectorsFormat knnVectorsFormat;
+
+  public ConfigurableMCodec() {
+    super("ConfigurableMCodec", new Lucene94Codec());
+    knnVectorsFormat = new Lucene94HnswVectorsFormat(128, 100);
+  }
+
+  public ConfigurableMCodec(int maxConn) {
+    super("ConfigurableMCodec", new Lucene94Codec());
+    knnVectorsFormat = new Lucene94HnswVectorsFormat(maxConn, 100);
+  }
+
+  @Override
+  public KnnVectorsFormat knnVectorsFormat() {
+    return knnVectorsFormat;
+  }
+}
diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/vector/package-info.java b/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/vector/package-info.java
new file mode 100644
index 00000000000..6c1ab143cc9
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/vector/package-info.java
@@ -0,0 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Codecs for testing different vector formats. */
+package org.apache.lucene.tests.codecs.vector;
diff --git a/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec b/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
index cdc3aeca2a9..7b75d2bd4d3 100644
--- a/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
+++ b/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
@@ -21,3 +21,4 @@ org.apache.lucene.tests.codecs.compressing.FastDecompressionCompressingCodec
 org.apache.lucene.tests.codecs.compressing.HighCompressionCompressingCodec
 org.apache.lucene.tests.codecs.compressing.LZ4WithPresetCompressingCodec
 org.apache.lucene.tests.codecs.compressing.dummy.DummyCompressingCodec
+org.apache.lucene.tests.codecs.vector.ConfigurableMCodec