You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2020/02/05 21:34:59 UTC

[lucene-solr] branch master updated: LUCENE-9206: Improve IndexMergeTool defaults and options

This is an automated email from the ASF dual-hosted git repository.

rmuir pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new 93b83f6  LUCENE-9206: Improve IndexMergeTool defaults and options
93b83f6 is described below

commit 93b83f635dffc782dc70174e5ea377ceab6a8174
Author: Robert Muir <rm...@apache.org>
AuthorDate: Wed Feb 5 16:31:07 2020 -0500

    LUCENE-9206: Improve IndexMergeTool defaults and options
    
    IndexMergeTool previously had no options and always forceMerge(1)
    the resulting index. This can result in wasted work and confusing
    performance (unbalancing the index).
    
    Instead the default is to not do anything, except merges from the
    merge policy.
---
 lucene/CHANGES.txt                                 |  4 +
 lucene/MIGRATE.txt                                 |  6 ++
 .../org/apache/lucene/misc/IndexMergeTool.java     | 89 ++++++++++++++++++----
 .../org/apache/lucene/misc/TestIndexMergeTool.java | 65 ++++++++++++++++
 4 files changed, 150 insertions(+), 14 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 0d6b772..269e3dd 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -68,6 +68,10 @@ Improvements
 * LUCENE-9110: Refactor stack analysis in tests to use generalized LuceneTestCase
   methods that use StackWalker (Uwe Schindler)
 
+* LUCENE-9206: IndexMergeTool gets additional options to control the merging.
+  This tool no longer forceMerge(1)s to a single segment by default. If you
+  rely upon this behavior, pass -max-segments 1 instead. (Robert Muir)
+
 Bug fixes
 
 * LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while 
diff --git a/lucene/MIGRATE.txt b/lucene/MIGRATE.txt
index b7c17e2..c019260 100644
--- a/lucene/MIGRATE.txt
+++ b/lucene/MIGRATE.txt
@@ -1,5 +1,11 @@
 # Apache Lucene Migration Guide
 
+## o.a.l.misc.IndexMergeTool defaults changes (LUCENE-9206) ##
+
+This command-line tool no longer forceMerges to a single segment. Instead, by
+default it just follows (configurable) merge policy. If you really want to merge
+to a single segment, you can pass -max-segments 1.
+
 ## o.a.l.util.fst.Builder is renamed FSTCompiler with fluent-style Builder (LUCENE-9089) ##
 
 Simply use FSTCompiler instead of the previous Builder. Use either the simple constructor with default settings, or
diff --git a/lucene/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java b/lucene/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java
index 09eb850..1342a55 100644
--- a/lucene/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java
@@ -19,12 +19,12 @@ package org.apache.lucene.misc;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.MergePolicy;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.store.HardlinkCopyDirectoryWrapper;
 import org.apache.lucene.util.SuppressForbidden;
 
-import java.io.IOException;
 import java.nio.file.Paths;
 
 /**
@@ -34,28 +34,89 @@ import java.nio.file.Paths;
 @SuppressForbidden(reason = "System.out required: command line tool")
 public class IndexMergeTool {
   
-  public static void main(String[] args) throws IOException {
-    if (args.length < 3) {
-      System.err.println("Usage: IndexMergeTool <mergedIndex> <index1> <index2> [index3] ...");
-      System.exit(1);
+  static final String USAGE =
+    "Usage: IndexMergeTool [OPTION...] <mergedIndex> <index1> <index2> [index3] ...\n" +
+    "Merges source indexes 'index1' .. 'indexN' into 'mergedIndex'\n" +
+    "\n" +
+    "OPTIONS:\n" +
+    " -merge-policy ClassName  specifies MergePolicy class (must be in CLASSPATH).The default is\n" +
+    "                          'org.apache.lucene.index.TieredMergePolicy.TieredMergePolicy'\n" +
+    " -max-segments N          force-merge's the index to a maximum of N segments. Default is\n" +
+    "                          to execute only the merges according to the merge policy.\n" +
+    " -verbose                 print additional details.\n";
+
+  static class Options {
+    String mergedIndexPath;
+    String indexPaths[];
+    IndexWriterConfig config = new IndexWriterConfig(null).setOpenMode(OpenMode.CREATE);
+    int maxSegments = 0;
+
+    static Options parse(String args[]) throws ReflectiveOperationException {
+      Options options = new Options();
+      int index = 0;
+      while (index < args.length) {
+        if (!args[index].startsWith("-")) {
+          break;
+        }
+        if (args[index] == "--") {
+          break;
+        }
+        switch(args[index]) {
+          case "-merge-policy":
+            String clazzName = args[++index];
+            Class<? extends MergePolicy> clazz = Class.forName(clazzName).asSubclass(MergePolicy.class);
+            options.config.setMergePolicy(clazz.getConstructor().newInstance());
+            break;
+          case "-max-segments":
+            options.maxSegments = Integer.parseInt(args[++index]);
+            break;
+          case "-verbose":
+            options.config.setInfoStream(System.err);
+            break;
+          default: throw new IllegalArgumentException("unrecognized option: '" + args[index] + "'\n" + USAGE);
+        }
+        index++;
+      }
+
+      // process any remaining arguments as the target and source index paths.
+      int numPaths = args.length - index;
+      if (numPaths < 3) {
+        throw new IllegalArgumentException("not enough parameters.\n" + USAGE);
+      }
+
+      options.mergedIndexPath = args[index];
+      options.indexPaths = new String[numPaths - 1];
+      System.arraycopy(args, index + 1, options.indexPaths, 0, options.indexPaths.length);
+      return options;
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    Options options = null;
+    try {
+      options = Options.parse(args);
+    } catch (IllegalArgumentException e) {
+      System.err.println(e.getMessage());
+      System.exit(2);
     }
 
     // Try to use hardlinks to source segments, if possible.
-    Directory mergedIndex = new HardlinkCopyDirectoryWrapper(FSDirectory.open(Paths.get(args[0])));
-
-    IndexWriter writer = new IndexWriter(mergedIndex, 
-        new IndexWriterConfig(null).setOpenMode(OpenMode.CREATE));
+    Directory mergedIndex = new HardlinkCopyDirectoryWrapper(FSDirectory.open(Paths.get(options.mergedIndexPath)));
 
-    Directory[] indexes = new Directory[args.length - 1];
-    for (int i = 1; i < args.length; i++) {
-      indexes[i  - 1] = FSDirectory.open(Paths.get(args[i]));
+    Directory[] indexes = new Directory[options.indexPaths.length];
+    for (int i = 0; i < indexes.length; i++) {
+      indexes[i] = FSDirectory.open(Paths.get(options.indexPaths[i]));
     }
 
+    IndexWriter writer = new IndexWriter(mergedIndex, options.config);
+
     System.out.println("Merging...");
     writer.addIndexes(indexes);
 
-    System.out.println("Full merge...");
-    writer.forceMerge(1);
+    if (options.maxSegments > 0) {
+      System.out.println("Force-merging to " + options.maxSegments + "...");
+      writer.forceMerge(options.maxSegments);
+    }
     writer.close();
     System.out.println("Done.");
   }
diff --git a/lucene/misc/src/test/org/apache/lucene/misc/TestIndexMergeTool.java b/lucene/misc/src/test/org/apache/lucene/misc/TestIndexMergeTool.java
new file mode 100644
index 0000000..b9183ea
--- /dev/null
+++ b/lucene/misc/src/test/org/apache/lucene/misc/TestIndexMergeTool.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.misc;
+
+import org.apache.lucene.index.LogDocMergePolicy;
+import org.apache.lucene.misc.IndexMergeTool.Options;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.PrintStreamInfoStream;
+
+public class TestIndexMergeTool extends LuceneTestCase {
+
+  public void testNoParameters() throws Exception {
+    expectThrows(IllegalArgumentException.class, () -> {
+      Options.parse(new String[] {});
+    });
+  }
+
+  public void testOneParameter() throws Exception {
+    expectThrows(IllegalArgumentException.class, () -> {
+      Options.parse(new String[] { "target" });
+    });
+  }
+
+  public void testTwoParameters() throws Exception {
+    expectThrows(IllegalArgumentException.class, () -> {
+      Options.parse(new String[] { "target", "source1" });
+    });
+  }
+
+  public void testThreeParameters() throws Exception {
+    Options options = Options.parse(new String[] { "target", "source1", "source2" });
+    assertEquals("target", options.mergedIndexPath);
+    assertArrayEquals(new String[] { "source1", "source2" }, options.indexPaths);
+  }
+
+  public void testVerboseOption() throws Exception {
+    Options options = Options.parse(new String[] { "-verbose", "target", "source1", "source2" });
+    assertEquals(PrintStreamInfoStream.class, options.config.getInfoStream().getClass());
+  }
+
+  public void testMergePolicyOption() throws Exception {
+    Options options = Options.parse(new String[] { "-merge-policy", LogDocMergePolicy.class.getName(), "target", "source1", "source2" });
+    assertEquals(LogDocMergePolicy.class, options.config.getMergePolicy().getClass());
+  }
+
+  public void testMaxSegmentsOption() throws Exception {
+    Options options = Options.parse(new String[] { "-max-segments", "42", "target", "source1", "source2" });
+    assertEquals(42, options.maxSegments);
+  }
+
+}