You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2011/05/09 18:17:41 UTC

svn commit: r1101088 - in /lucene/dev/trunk/lucene: ./ src/java/org/apache/lucene/index/ src/test/org/apache/lucene/index/

Author: uschindler
Date: Mon May  9 16:17:40 2011
New Revision: 1101088

URL: http://svn.apache.org/viewvc?rev=1101088&view=rev
Log:
LUCENE-3082: Added index upgrade tool oal.index.IndexUpgrader that allows to upgrade all segments to last recent supported index format without fully optimizing

Added:
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexUpgrader.java   (with props)
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java   (with props)
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/index.31.optimized.cfs.zip   (with props)
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/index.31.optimized.nocfs.zip   (with props)
Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1101088&r1=1101087&r2=1101088&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Mon May  9 16:17:40 2011
@@ -484,6 +484,12 @@ Changes in runtime behavior
   returns NumericField instances. (Uwe Schindler, Ryan McKinley,
   Mike McCandless)
 
+New features
+
+* LUCENE-3082: Added index upgrade tool oal.index.IndexUpgrader
+  that allows to upgrade all segments to last recent supported index
+  format without fully optimizing.  (Uwe Schindler, Mike McCandless)
+
 Optimizations
 
 * LUCENE-2990: ArrayUtil/CollectionUtil.*Sort() methods now exit early

Added: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexUpgrader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexUpgrader.java?rev=1101088&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexUpgrader.java (added)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexUpgrader.java Mon May  9 16:17:40 2011
@@ -0,0 +1,129 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.Constants;
+import org.apache.lucene.util.Version;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.Collection;
+
+/**
+  * This is an easy-to-use tool that upgrades all segments of an index from previous Lucene versions
+  * to the current segment file format. It can be used from command line:
+  * <pre>
+  *  java -cp lucene-core.jar org.apache.lucene.index.IndexUpgrader [-delete-prior-commits] [-verbose] indexDir
+  * </pre>
+  * Alternatively this class can be instantiated and {@link #upgrade} invoked. It uses {@link UpgradeIndexMergePolicy}
+  * and triggers the upgrade via an optimize request to {@link IndexWriter}.
+  * <p>This tool keeps only the last commit in an index; for this
+  * reason, if the incoming index has more than one commit, the tool
+  * refuses to run by default. Specify {@code -delete-prior-commits}
+  * to override this, allowing the tool to delete all but the last commit.
+  * From Java code this can be enabled by passing {@code true} to
+  * {@link #IndexUpgrader(Directory,PrintStream,boolean)}.
+  */
+public final class IndexUpgrader {
+
+  private static void printUsage() {
+    System.err.println("Upgrades an index so all segments created with a previous Lucene version are rewritten.");
+    System.err.println("Usage:");
+    System.err.println("  java " + IndexUpgrader.class.getName() + " [-delete-prior-commits] [-verbose] indexDir");
+    System.err.println("This tool keeps only the last commit in an index; for this");
+    System.err.println("reason, if the incoming index has more than one commit, the tool");
+    System.err.println("refuses to run by default. Specify -delete-prior-commits to override");
+    System.err.println("this, allowing the tool to delete all but the last commit.");
+    System.exit(1);
+  }
+
+  public static void main(String[] args) throws IOException {
+    String dir = null;
+    boolean deletePriorCommits = false;
+    PrintStream out = null;
+    for (String arg : args) {
+      if ("-delete-prior-commits".equals(arg)) {
+        deletePriorCommits = true;
+      } else if ("-verbose".equals(arg)) {
+        out = System.out;
+      } else if (dir == null) {
+        dir = arg;
+      } else {
+        printUsage();
+      }
+    }
+    if (dir == null) {
+      printUsage();
+    }
+    
+    new IndexUpgrader(FSDirectory.open(new File(dir)), out, deletePriorCommits).upgrade();
+  }
+  
+  private final Directory dir;
+  private final PrintStream infoStream;
+  private final IndexWriterConfig iwc;
+  private final boolean deletePriorCommits;
+  
+  @SuppressWarnings("deprecation")
+  public IndexUpgrader(Directory dir) {
+    this(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, null), null, false);
+  }
+  
+  @SuppressWarnings("deprecation")
+  public IndexUpgrader(Directory dir, PrintStream infoStream, boolean deletePriorCommits) {
+    this(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, null), infoStream, deletePriorCommits);
+  }
+  
+  public IndexUpgrader(Directory dir, IndexWriterConfig iwc, PrintStream infoStream, boolean deletePriorCommits) {
+    this.dir = dir;
+    this.iwc = iwc;
+    this.infoStream = infoStream;
+    this.deletePriorCommits = deletePriorCommits;
+  }
+  
+  public void upgrade() throws IOException {
+    if (!IndexReader.indexExists(dir)) {
+      throw new IndexNotFoundException(dir.toString());
+    }
+  
+    if (!deletePriorCommits) {
+      final Collection<IndexCommit> commits = IndexReader.listCommits(dir);
+      if (commits.size() > 1) {
+        throw new IllegalArgumentException("This tool was invoked to not delete prior commit points, but the following commits were found: " + commits);
+      }
+    }
+    
+    final IndexWriterConfig c = (IndexWriterConfig) iwc.clone();
+    c.setMergePolicy(new UpgradeIndexMergePolicy(c.getMergePolicy()));
+    c.setIndexDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
+    
+    final IndexWriter w = new IndexWriter(dir, c);
+    try {
+      w.setInfoStream(infoStream);
+      w.message("Upgrading all pre-" + Constants.LUCENE_MAIN_VERSION + " segments of index directory '" + dir + "' to version " + Constants.LUCENE_MAIN_VERSION + "...");
+      w.optimize();
+      w.message("All segments upgraded to version " + Constants.LUCENE_MAIN_VERSION);
+    } finally {
+      w.close();
+    }
+  }
+  
+}

Added: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java?rev=1101088&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java (added)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java Mon May  9 16:17:40 2011
@@ -0,0 +1,150 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.Constants;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+/** This {@link MergePolicy} is used for upgrading all existing segments of
+  * an index when calling {@link IndexWriter#optimize()}.
+  * All other methods delegate to the base {@code MergePolicy} given to the constructor.
+  * This allows for an as-cheap-as possible upgrade of an older index by only upgrading segments that
+  * are created by previous Lucene versions. Optimize does no longer really optimize
+  * it is just used to &quot;optimize&quot; older segment versions away.
+  * <p>In general one would use {@link IndexUpgrader}, but for a fully customizeable upgrade,
+  * you can use this like any other {@code MergePolicy} and call {@link IndexWriter#optimize()}:
+  * <pre class="prettyprint lang-java">
+  *  IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_XX, new KeywordAnalyzer());
+  *  iwc.setMergePolicy(new UpgradeIndexMergePolicy(iwc.getMergePolicy()));
+  *  IndexWriter w = new IndexWriter(dir, iwc);
+  *  w.optimize();
+  *  w.close();
+  * </pre>
+  * @lucene.experimental
+  * @see IndexUpgrader
+  */
+public class UpgradeIndexMergePolicy extends MergePolicy {
+
+  protected final MergePolicy base;
+
+  /** Wrap the given {@link MergePolicy} and intercept optimize requests to
+   * only upgrade segments written with previous Lucene versions. */
+  public UpgradeIndexMergePolicy(MergePolicy base) {
+    this.base = base;
+  }
+  
+  /** Returns if the given segment should be upgraded. The default implementation
+   * will return {@code !Constants.LUCENE_MAIN_VERSION.equals(si.getVersion())},
+   * so all segments created with a different version number than this Lucene version will
+   * get upgraded.
+   */
+  protected boolean shouldUpgradeSegment(SegmentInfo si) {
+    return !Constants.LUCENE_MAIN_VERSION.equals(si.getVersion());
+  }
+
+  @Override
+  public void setIndexWriter(IndexWriter writer) {
+    super.setIndexWriter(writer);
+    base.setIndexWriter(writer);
+  }
+  
+  @Override
+  public MergeSpecification findMerges(SegmentInfos segmentInfos) throws CorruptIndexException, IOException {
+    return base.findMerges(segmentInfos);
+  }
+  
+  @Override
+  public MergeSpecification findMergesForOptimize(SegmentInfos segmentInfos, int maxSegmentCount, Set<SegmentInfo> segmentsToOptimize) throws CorruptIndexException, IOException {
+    // first find all old segments
+    final HashSet<SegmentInfo> oldSegments = new HashSet<SegmentInfo>();
+    for (final SegmentInfo si : segmentInfos) {
+      if (segmentsToOptimize.contains(si) && shouldUpgradeSegment(si)) {
+        oldSegments.add(si);
+      }
+    }
+    
+    if (verbose()) message("findMergesForOptimize: segmentsToUpgrade=" + oldSegments);
+      
+    if (oldSegments.isEmpty())
+      return null;
+
+    MergeSpecification spec = base.findMergesForOptimize(segmentInfos, maxSegmentCount, oldSegments);    
+    
+    if (spec != null) {
+      // remove all segments that are in merge specification from oldSegments,
+      // the resulting set contains all segments that are left over
+      // and will be merged to one additional segment:
+      for (final OneMerge om : spec.merges) {
+        oldSegments.removeAll(om.segments);
+      }
+    }
+
+    if (!oldSegments.isEmpty()) {
+      if (verbose())
+        message("findMergesForOptimize: " +  base.getClass().getSimpleName() +
+        " does not want to merge all old segments, merge remaining ones into new segment: " + oldSegments);
+      final SegmentInfos newInfos = new SegmentInfos();
+      for (final SegmentInfo si : segmentInfos) {
+        if (oldSegments.contains(si)) {
+          newInfos.add(si);
+        }
+      }
+      // add the final merge
+      if (spec == null) {
+        spec = new MergeSpecification();
+      }
+      spec.add(new OneMerge(newInfos));
+    }
+
+    return spec;
+  }
+  
+  @Override
+  public MergeSpecification findMergesToExpungeDeletes(SegmentInfos segmentInfos) throws CorruptIndexException, IOException {
+    return base.findMergesToExpungeDeletes(segmentInfos);
+  }
+  
+  @Override
+  public boolean useCompoundFile(SegmentInfos segments, SegmentInfo newSegment) throws IOException {
+    return base.useCompoundFile(segments, newSegment);
+  }
+  
+  @Override
+  public void close() {
+    base.close();
+  }
+  
+  @Override
+  public String toString() {
+    return "[" + getClass().getSimpleName() + "->" + base + "]";
+  }
+  
+  private boolean verbose() {
+    IndexWriter w = writer.get();
+    return w != null && w.verbose();
+  }
+
+  private void message(String message) {
+    if (verbose())
+      writer.get().message("UPGMP: " + message);
+  }
+  
+}

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java?rev=1101088&r1=1101087&r2=1101088&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java Mon May  9 16:17:40 2011
@@ -22,6 +22,7 @@ import java.io.File;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.util.Arrays;
+import java.util.ArrayList;
 import java.util.List;
 import java.util.Random;
 
@@ -41,10 +42,12 @@ import org.apache.lucene.search.Similari
 import org.apache.lucene.search.SimilarityProvider;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.util.Constants;
 
 /*
   Verify we can read the pre-4.0 file format, do searches
@@ -110,6 +113,10 @@ public class TestBackwardsCompatibility 
                                      "29.nocfs",
   };
   
+  final String[] oldOptimizedNames = {"31.optimized.cfs",
+                                      "31.optimized.nocfs",
+  };
+  
   /** This test checks that *only* IndexFormatTooOldExceptions are throws when you open and operate on too old indexes! */
   public void testUnsupportedOldIndexes() throws Exception {
     for(int i=0;i<unsupportedNames.length;i++) {
@@ -694,5 +701,98 @@ public class TestBackwardsCompatibility 
       _TestUtil.rmDir(oldIndexDir);
     }
   }
+  
+  private int checkAllSegmentsUpgraded(Directory dir) throws IOException {
+    final SegmentInfos infos = new SegmentInfos();
+    infos.read(dir);
+    if (VERBOSE) {
+      System.out.println("checkAllSegmentsUpgraded: " + infos);
+    }
+    for (SegmentInfo si : infos) {
+      assertEquals(Constants.LUCENE_MAIN_VERSION, si.getVersion());
+    }
+    return infos.size();
+  }
+  
+  private int getNumberOfSegments(Directory dir) throws IOException {
+    final SegmentInfos infos = new SegmentInfos();
+    infos.read(dir);
+    return infos.size();
+  }
+
+  public void testUpgradeOldIndex() throws Exception {
+    List<String> names = new ArrayList<String>(oldNames.length + oldOptimizedNames.length);
+    names.addAll(Arrays.asList(oldNames));
+    names.addAll(Arrays.asList(oldOptimizedNames));
+    for(String name : names) {
+      if (VERBOSE) {
+        System.out.println("testUpgradeOldIndex: index=" +name);
+      }
+      File oldIndxeDir = _TestUtil.getTempDir(name);
+      _TestUtil.unzip(getDataFile("index." + name + ".zip"), oldIndxeDir);
+      Directory dir = newFSDirectory(oldIndxeDir);
+
+      new IndexUpgrader(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null), VERBOSE ? System.out : null, false)
+        .upgrade();
+
+      checkAllSegmentsUpgraded(dir);
+
+      _TestUtil.checkIndex(dir);
+      
+      dir.close();
+      _TestUtil.rmDir(oldIndxeDir);
+    }
+  }
+
+  public void testUpgradeOldOptimizedIndexWithAdditions() throws Exception {
+    for (String name : oldOptimizedNames) {
+      if (VERBOSE) {
+        System.out.println("testUpgradeOldOptimizedIndexWithAdditions: index=" +name);
+      }
+      File oldIndxeDir = _TestUtil.getTempDir(name);
+      _TestUtil.unzip(getDataFile("index." + name + ".zip"), oldIndxeDir);
+      Directory dir = newFSDirectory(oldIndxeDir);
+
+      assertEquals("Original index must be optimized", 1, getNumberOfSegments(dir));
+
+      // create a bunch of dummy segments
+      int id = 40;
+      RAMDirectory ramDir = new RAMDirectory();
+      for (int i = 0; i < 3; i++) {
+        // only use Log- or TieredMergePolicy, to make document addition predictable and not suddenly merge:
+        MergePolicy mp = random.nextBoolean() ? newLogMergePolicy() : newTieredMergePolicy();
+        IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
+          .setMergePolicy(mp);
+        IndexWriter w = new IndexWriter(ramDir, iwc);
+        // add few more docs:
+        for(int j = 0; j < RANDOM_MULTIPLIER * random.nextInt(30); j++) {
+          addDoc(w, id++);
+        }
+        w.close(false);
+      }
+      
+      // add dummy segments (which are all in current version) to optimized index
+      MergePolicy mp = random.nextBoolean() ? newLogMergePolicy() : newTieredMergePolicy();
+      IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, null)
+        .setMergePolicy(mp);
+      IndexWriter w = new IndexWriter(dir, iwc);
+      w.setInfoStream(VERBOSE ? System.out : null);
+      w.addIndexes(ramDir);
+      w.close(false);
+      
+      // determine count of segments in modified index
+      final int origSegCount = getNumberOfSegments(dir);
+      
+      new IndexUpgrader(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null), VERBOSE ? System.out : null, false)
+        .upgrade();
+
+      final int segCount = checkAllSegmentsUpgraded(dir);
+      assertEquals("Index must still contain the same number of segments, as only one segment was upgraded and nothing else merged",
+        origSegCount, segCount);
+      
+      dir.close();
+      _TestUtil.rmDir(oldIndxeDir);
+    }
+  }
 
 }

Added: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/index.31.optimized.cfs.zip
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/index.31.optimized.cfs.zip?rev=1101088&view=auto
==============================================================================
Binary file - no diff available.

Added: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/index.31.optimized.nocfs.zip
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/index.31.optimized.nocfs.zip?rev=1101088&view=auto
==============================================================================
Binary file - no diff available.