You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2011/05/09 18:17:41 UTC
svn commit: r1101088 - in /lucene/dev/trunk/lucene: ./
src/java/org/apache/lucene/index/ src/test/org/apache/lucene/index/
Author: uschindler
Date: Mon May 9 16:17:40 2011
New Revision: 1101088
URL: http://svn.apache.org/viewvc?rev=1101088&view=rev
Log:
LUCENE-3082: Added index upgrade tool oal.index.IndexUpgrader that allows to upgrade all segments to last recent supported index format without fully optimizing
Added:
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexUpgrader.java (with props)
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java (with props)
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/index.31.optimized.cfs.zip (with props)
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/index.31.optimized.nocfs.zip (with props)
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1101088&r1=1101087&r2=1101088&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Mon May 9 16:17:40 2011
@@ -484,6 +484,12 @@ Changes in runtime behavior
returns NumericField instances. (Uwe Schindler, Ryan McKinley,
Mike McCandless)
+New features
+
+* LUCENE-3082: Added index upgrade tool oal.index.IndexUpgrader
+ that allows to upgrade all segments to last recent supported index
+ format without fully optimizing. (Uwe Schindler, Mike McCandless)
+
Optimizations
* LUCENE-2990: ArrayUtil/CollectionUtil.*Sort() methods now exit early
Added: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexUpgrader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexUpgrader.java?rev=1101088&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexUpgrader.java (added)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexUpgrader.java Mon May 9 16:17:40 2011
@@ -0,0 +1,129 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.Constants;
+import org.apache.lucene.util.Version;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.Collection;
+
+/**
+ * This is an easy-to-use tool that upgrades all segments of an index from previous Lucene versions
+ * to the current segment file format. It can be used from command line:
+ * <pre>
+ * java -cp lucene-core.jar org.apache.lucene.index.IndexUpgrader [-delete-prior-commits] [-verbose] indexDir
+ * </pre>
+ * Alternatively this class can be instantiated and {@link #upgrade} invoked. It uses {@link UpgradeIndexMergePolicy}
+ * and triggers the upgrade via an optimize request to {@link IndexWriter}.
+ * <p>This tool keeps only the last commit in an index; for this
+ * reason, if the incoming index has more than one commit, the tool
+ * refuses to run by default. Specify {@code -delete-prior-commits}
+ * to override this, allowing the tool to delete all but the last commit.
+ * From Java code this can be enabled by passing {@code true} to
+ * {@link #IndexUpgrader(Directory,PrintStream,boolean)}.
+ */
+public final class IndexUpgrader {
+
+ private static void printUsage() {
+ System.err.println("Upgrades an index so all segments created with a previous Lucene version are rewritten.");
+ System.err.println("Usage:");
+ System.err.println(" java " + IndexUpgrader.class.getName() + " [-delete-prior-commits] [-verbose] indexDir");
+ System.err.println("This tool keeps only the last commit in an index; for this");
+ System.err.println("reason, if the incoming index has more than one commit, the tool");
+ System.err.println("refuses to run by default. Specify -delete-prior-commits to override");
+ System.err.println("this, allowing the tool to delete all but the last commit.");
+ System.exit(1);
+ }
+
+ public static void main(String[] args) throws IOException {
+ String dir = null;
+ boolean deletePriorCommits = false;
+ PrintStream out = null;
+ for (String arg : args) {
+ if ("-delete-prior-commits".equals(arg)) {
+ deletePriorCommits = true;
+ } else if ("-verbose".equals(arg)) {
+ out = System.out;
+ } else if (dir == null) {
+ dir = arg;
+ } else {
+ printUsage();
+ }
+ }
+ if (dir == null) {
+ printUsage();
+ }
+
+ new IndexUpgrader(FSDirectory.open(new File(dir)), out, deletePriorCommits).upgrade();
+ }
+
+ private final Directory dir;
+ private final PrintStream infoStream;
+ private final IndexWriterConfig iwc;
+ private final boolean deletePriorCommits;
+
+ @SuppressWarnings("deprecation")
+ public IndexUpgrader(Directory dir) {
+ this(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, null), null, false);
+ }
+
+ @SuppressWarnings("deprecation")
+ public IndexUpgrader(Directory dir, PrintStream infoStream, boolean deletePriorCommits) {
+ this(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, null), infoStream, deletePriorCommits);
+ }
+
+ public IndexUpgrader(Directory dir, IndexWriterConfig iwc, PrintStream infoStream, boolean deletePriorCommits) {
+ this.dir = dir;
+ this.iwc = iwc;
+ this.infoStream = infoStream;
+ this.deletePriorCommits = deletePriorCommits;
+ }
+
+ public void upgrade() throws IOException {
+ if (!IndexReader.indexExists(dir)) {
+ throw new IndexNotFoundException(dir.toString());
+ }
+
+ if (!deletePriorCommits) {
+ final Collection<IndexCommit> commits = IndexReader.listCommits(dir);
+ if (commits.size() > 1) {
+ throw new IllegalArgumentException("This tool was invoked to not delete prior commit points, but the following commits were found: " + commits);
+ }
+ }
+
+ final IndexWriterConfig c = (IndexWriterConfig) iwc.clone();
+ c.setMergePolicy(new UpgradeIndexMergePolicy(c.getMergePolicy()));
+ c.setIndexDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
+
+ final IndexWriter w = new IndexWriter(dir, c);
+ try {
+ w.setInfoStream(infoStream);
+ w.message("Upgrading all pre-" + Constants.LUCENE_MAIN_VERSION + " segments of index directory '" + dir + "' to version " + Constants.LUCENE_MAIN_VERSION + "...");
+ w.optimize();
+ w.message("All segments upgraded to version " + Constants.LUCENE_MAIN_VERSION);
+ } finally {
+ w.close();
+ }
+ }
+
+}
Added: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java?rev=1101088&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java (added)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java Mon May 9 16:17:40 2011
@@ -0,0 +1,150 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.Constants;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+/** This {@link MergePolicy} is used for upgrading all existing segments of
+ * an index when calling {@link IndexWriter#optimize()}.
+ * All other methods delegate to the base {@code MergePolicy} given to the constructor.
+ * This allows for an as-cheap-as possible upgrade of an older index by only upgrading segments that
+ * are created by previous Lucene versions. Optimize does no longer really optimize
+ * it is just used to "optimize" older segment versions away.
+ * <p>In general one would use {@link IndexUpgrader}, but for a fully customizeable upgrade,
+ * you can use this like any other {@code MergePolicy} and call {@link IndexWriter#optimize()}:
+ * <pre class="prettyprint lang-java">
+ * IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_XX, new KeywordAnalyzer());
+ * iwc.setMergePolicy(new UpgradeIndexMergePolicy(iwc.getMergePolicy()));
+ * IndexWriter w = new IndexWriter(dir, iwc);
+ * w.optimize();
+ * w.close();
+ * </pre>
+ * @lucene.experimental
+ * @see IndexUpgrader
+ */
+public class UpgradeIndexMergePolicy extends MergePolicy {
+
+ protected final MergePolicy base;
+
+ /** Wrap the given {@link MergePolicy} and intercept optimize requests to
+ * only upgrade segments written with previous Lucene versions. */
+ public UpgradeIndexMergePolicy(MergePolicy base) {
+ this.base = base;
+ }
+
+ /** Returns if the given segment should be upgraded. The default implementation
+ * will return {@code !Constants.LUCENE_MAIN_VERSION.equals(si.getVersion())},
+ * so all segments created with a different version number than this Lucene version will
+ * get upgraded.
+ */
+ protected boolean shouldUpgradeSegment(SegmentInfo si) {
+ return !Constants.LUCENE_MAIN_VERSION.equals(si.getVersion());
+ }
+
+ @Override
+ public void setIndexWriter(IndexWriter writer) {
+ super.setIndexWriter(writer);
+ base.setIndexWriter(writer);
+ }
+
+ @Override
+ public MergeSpecification findMerges(SegmentInfos segmentInfos) throws CorruptIndexException, IOException {
+ return base.findMerges(segmentInfos);
+ }
+
+ @Override
+ public MergeSpecification findMergesForOptimize(SegmentInfos segmentInfos, int maxSegmentCount, Set<SegmentInfo> segmentsToOptimize) throws CorruptIndexException, IOException {
+ // first find all old segments
+ final HashSet<SegmentInfo> oldSegments = new HashSet<SegmentInfo>();
+ for (final SegmentInfo si : segmentInfos) {
+ if (segmentsToOptimize.contains(si) && shouldUpgradeSegment(si)) {
+ oldSegments.add(si);
+ }
+ }
+
+ if (verbose()) message("findMergesForOptimize: segmentsToUpgrade=" + oldSegments);
+
+ if (oldSegments.isEmpty())
+ return null;
+
+ MergeSpecification spec = base.findMergesForOptimize(segmentInfos, maxSegmentCount, oldSegments);
+
+ if (spec != null) {
+ // remove all segments that are in merge specification from oldSegments,
+ // the resulting set contains all segments that are left over
+ // and will be merged to one additional segment:
+ for (final OneMerge om : spec.merges) {
+ oldSegments.removeAll(om.segments);
+ }
+ }
+
+ if (!oldSegments.isEmpty()) {
+ if (verbose())
+ message("findMergesForOptimize: " + base.getClass().getSimpleName() +
+ " does not want to merge all old segments, merge remaining ones into new segment: " + oldSegments);
+ final SegmentInfos newInfos = new SegmentInfos();
+ for (final SegmentInfo si : segmentInfos) {
+ if (oldSegments.contains(si)) {
+ newInfos.add(si);
+ }
+ }
+ // add the final merge
+ if (spec == null) {
+ spec = new MergeSpecification();
+ }
+ spec.add(new OneMerge(newInfos));
+ }
+
+ return spec;
+ }
+
+ @Override
+ public MergeSpecification findMergesToExpungeDeletes(SegmentInfos segmentInfos) throws CorruptIndexException, IOException {
+ return base.findMergesToExpungeDeletes(segmentInfos);
+ }
+
+ @Override
+ public boolean useCompoundFile(SegmentInfos segments, SegmentInfo newSegment) throws IOException {
+ return base.useCompoundFile(segments, newSegment);
+ }
+
+ @Override
+ public void close() {
+ base.close();
+ }
+
+ @Override
+ public String toString() {
+ return "[" + getClass().getSimpleName() + "->" + base + "]";
+ }
+
+ private boolean verbose() {
+ IndexWriter w = writer.get();
+ return w != null && w.verbose();
+ }
+
+ private void message(String message) {
+ if (verbose())
+ writer.get().message("UPGMP: " + message);
+ }
+
+}
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java?rev=1101088&r1=1101087&r2=1101088&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java Mon May 9 16:17:40 2011
@@ -22,6 +22,7 @@ import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.util.Arrays;
+import java.util.ArrayList;
import java.util.List;
import java.util.Random;
@@ -41,10 +42,12 @@ import org.apache.lucene.search.Similari
import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.util.Constants;
/*
Verify we can read the pre-4.0 file format, do searches
@@ -110,6 +113,10 @@ public class TestBackwardsCompatibility
"29.nocfs",
};
+ final String[] oldOptimizedNames = {"31.optimized.cfs",
+ "31.optimized.nocfs",
+ };
+
/** This test checks that *only* IndexFormatTooOldExceptions are throws when you open and operate on too old indexes! */
public void testUnsupportedOldIndexes() throws Exception {
for(int i=0;i<unsupportedNames.length;i++) {
@@ -694,5 +701,98 @@ public class TestBackwardsCompatibility
_TestUtil.rmDir(oldIndexDir);
}
}
+
+ private int checkAllSegmentsUpgraded(Directory dir) throws IOException {
+ final SegmentInfos infos = new SegmentInfos();
+ infos.read(dir);
+ if (VERBOSE) {
+ System.out.println("checkAllSegmentsUpgraded: " + infos);
+ }
+ for (SegmentInfo si : infos) {
+ assertEquals(Constants.LUCENE_MAIN_VERSION, si.getVersion());
+ }
+ return infos.size();
+ }
+
+ private int getNumberOfSegments(Directory dir) throws IOException {
+ final SegmentInfos infos = new SegmentInfos();
+ infos.read(dir);
+ return infos.size();
+ }
+
+ public void testUpgradeOldIndex() throws Exception {
+ List<String> names = new ArrayList<String>(oldNames.length + oldOptimizedNames.length);
+ names.addAll(Arrays.asList(oldNames));
+ names.addAll(Arrays.asList(oldOptimizedNames));
+ for(String name : names) {
+ if (VERBOSE) {
+ System.out.println("testUpgradeOldIndex: index=" +name);
+ }
+ File oldIndxeDir = _TestUtil.getTempDir(name);
+ _TestUtil.unzip(getDataFile("index." + name + ".zip"), oldIndxeDir);
+ Directory dir = newFSDirectory(oldIndxeDir);
+
+ new IndexUpgrader(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null), VERBOSE ? System.out : null, false)
+ .upgrade();
+
+ checkAllSegmentsUpgraded(dir);
+
+ _TestUtil.checkIndex(dir);
+
+ dir.close();
+ _TestUtil.rmDir(oldIndxeDir);
+ }
+ }
+
+ public void testUpgradeOldOptimizedIndexWithAdditions() throws Exception {
+ for (String name : oldOptimizedNames) {
+ if (VERBOSE) {
+ System.out.println("testUpgradeOldOptimizedIndexWithAdditions: index=" +name);
+ }
+ File oldIndxeDir = _TestUtil.getTempDir(name);
+ _TestUtil.unzip(getDataFile("index." + name + ".zip"), oldIndxeDir);
+ Directory dir = newFSDirectory(oldIndxeDir);
+
+ assertEquals("Original index must be optimized", 1, getNumberOfSegments(dir));
+
+ // create a bunch of dummy segments
+ int id = 40;
+ RAMDirectory ramDir = new RAMDirectory();
+ for (int i = 0; i < 3; i++) {
+ // only use Log- or TieredMergePolicy, to make document addition predictable and not suddenly merge:
+ MergePolicy mp = random.nextBoolean() ? newLogMergePolicy() : newTieredMergePolicy();
+ IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
+ .setMergePolicy(mp);
+ IndexWriter w = new IndexWriter(ramDir, iwc);
+ // add few more docs:
+ for(int j = 0; j < RANDOM_MULTIPLIER * random.nextInt(30); j++) {
+ addDoc(w, id++);
+ }
+ w.close(false);
+ }
+
+ // add dummy segments (which are all in current version) to optimized index
+ MergePolicy mp = random.nextBoolean() ? newLogMergePolicy() : newTieredMergePolicy();
+ IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, null)
+ .setMergePolicy(mp);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ w.setInfoStream(VERBOSE ? System.out : null);
+ w.addIndexes(ramDir);
+ w.close(false);
+
+ // determine count of segments in modified index
+ final int origSegCount = getNumberOfSegments(dir);
+
+ new IndexUpgrader(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null), VERBOSE ? System.out : null, false)
+ .upgrade();
+
+ final int segCount = checkAllSegmentsUpgraded(dir);
+ assertEquals("Index must still contain the same number of segments, as only one segment was upgraded and nothing else merged",
+ origSegCount, segCount);
+
+ dir.close();
+ _TestUtil.rmDir(oldIndxeDir);
+ }
+ }
}
Added: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/index.31.optimized.cfs.zip
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/index.31.optimized.cfs.zip?rev=1101088&view=auto
==============================================================================
Binary file - no diff available.
Added: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/index.31.optimized.nocfs.zip
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/index.31.optimized.nocfs.zip?rev=1101088&view=auto
==============================================================================
Binary file - no diff available.