You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2017/07/04 17:59:59 UTC
[09/23] lucene-solr:jira/solr-10879: Remove FixBrokenOffsets since
7.0+ indices cannot have broken offsets.
Remove FixBrokenOffsets since 7.0+ indices cannot have broken offsets.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/869a48cc
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/869a48cc
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/869a48cc
Branch: refs/heads/jira/solr-10879
Commit: 869a48ccacddd3ad9e1dac68eee6d66b78a7049b
Parents: e809e09
Author: Adrien Grand <jp...@gmail.com>
Authored: Mon Jul 3 14:12:25 2017 +0200
Committer: Adrien Grand <jp...@gmail.com>
Committed: Mon Jul 3 14:12:25 2017 +0200
----------------------------------------------------------------------
.../apache/lucene/index/FixBrokenOffsets.java | 138 -------------------
.../lucene/index/TestFixBrokenOffsets.java | 112 ---------------
2 files changed, 250 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/869a48cc/lucene/backward-codecs/src/java/org/apache/lucene/index/FixBrokenOffsets.java
----------------------------------------------------------------------
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/index/FixBrokenOffsets.java b/lucene/backward-codecs/src/java/org/apache/lucene/index/FixBrokenOffsets.java
deleted file mode 100644
index 9b3615e..0000000
--- a/lucene/backward-codecs/src/java/org/apache/lucene/index/FixBrokenOffsets.java
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.index;
-
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.util.List;
-
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.SuppressForbidden;
-
-/**
- * Command-line tool that reads from a source index and
- * writes to a dest index, correcting any broken offsets
- * in the process.
- *
- * @lucene.experimental
- */
-public class FixBrokenOffsets {
- public SegmentInfos infos;
-
- FSDirectory fsDir;
-
- Path dir;
-
- @SuppressForbidden(reason = "System.out required: command line tool")
- public static void main(String[] args) throws IOException {
- if (args.length < 2) {
- System.err.println("Usage: FixBrokenOffsetse <srcDir> <destDir>");
- return;
- }
- Path srcPath = Paths.get(args[0]);
- if (!Files.exists(srcPath)) {
- throw new RuntimeException("srcPath " + srcPath.toAbsolutePath() + " doesn't exist");
- }
- Path destPath = Paths.get(args[1]);
- if (Files.exists(destPath)) {
- throw new RuntimeException("destPath " + destPath.toAbsolutePath() + " already exists; please remove it and re-run");
- }
- Directory srcDir = FSDirectory.open(srcPath);
- DirectoryReader reader = DirectoryReader.open(srcDir);
-
- List<LeafReaderContext> leaves = reader.leaves();
- CodecReader[] filtered = new CodecReader[leaves.size()];
- for(int i=0;i<leaves.size();i++) {
- filtered[i] = SlowCodecReaderWrapper.wrap(new FilterLeafReader(leaves.get(i).reader()) {
- @Override
- public Fields getTermVectors(int docID) throws IOException {
- Fields termVectors = in.getTermVectors(docID);
- if (termVectors == null) {
- return null;
- }
- return new FilterFields(termVectors) {
- @Override
- public Terms terms(String field) throws IOException {
- return new FilterTerms(super.terms(field)) {
- @Override
- public TermsEnum iterator() throws IOException {
- return new FilterTermsEnum(super.iterator()) {
- @Override
- public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
- return new FilterPostingsEnum(super.postings(reuse, flags)) {
- int nextLastStartOffset = 0;
- int lastStartOffset = 0;
-
- @Override
- public int nextPosition() throws IOException {
- int pos = super.nextPosition();
- lastStartOffset = nextLastStartOffset;
- nextLastStartOffset = startOffset();
- return pos;
- }
-
- @Override
- public int startOffset() throws IOException {
- int offset = super.startOffset();
- if (offset < lastStartOffset) {
- offset = lastStartOffset;
- }
- return offset;
- }
-
- @Override
- public int endOffset() throws IOException {
- int offset = super.endOffset();
- if (offset < lastStartOffset) {
- offset = lastStartOffset;
- }
- return offset;
- }
- };
- }
- };
- }
- };
- }
- };
- }
-
- @Override
- public CacheHelper getCoreCacheHelper() {
- return null;
- }
-
- @Override
- public CacheHelper getReaderCacheHelper() {
- return null;
- }
- });
- }
-
- Directory destDir = FSDirectory.open(destPath);
- // We need to maintain the same major version
- int createdMajor = SegmentInfos.readLatestCommit(srcDir).getIndexCreatedVersionMajor();
- new SegmentInfos(createdMajor).commit(destDir);
- IndexWriter writer = new IndexWriter(destDir, new IndexWriterConfig());
- writer.addIndexes(filtered);
- IOUtils.close(writer, reader, srcDir, destDir);
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/869a48cc/lucene/backward-codecs/src/test/org/apache/lucene/index/TestFixBrokenOffsets.java
----------------------------------------------------------------------
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestFixBrokenOffsets.java b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestFixBrokenOffsets.java
deleted file mode 100644
index 46b30d3..0000000
--- a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestFixBrokenOffsets.java
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.index;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.file.Path;
-import java.util.List;
-
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.store.MockDirectoryWrapper;
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.TestUtil;
-
-public class TestFixBrokenOffsets extends LuceneTestCase {
-
- // Run this in Lucene 6.x:
- //
- // ant test -Dtestcase=TestFixBrokenOffsets -Dtestmethod=testCreateBrokenOffsetsIndex -Dtests.codec=default -Dtests.useSecurityManager=false
- /*
- public void testCreateBrokenOffsetsIndex() throws IOException {
-
- Path indexDir = Paths.get("/tmp/brokenoffsets");
- Files.deleteIfExists(indexDir);
- Directory dir = newFSDirectory(indexDir);
- IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
-
- Document doc = new Document();
- FieldType fieldType = new FieldType(TextField.TYPE_STORED);
- fieldType.setStoreTermVectors(true);
- fieldType.setStoreTermVectorPositions(true);
- fieldType.setStoreTermVectorOffsets(true);
- Field field = new Field("foo", "bar", fieldType);
- field.setTokenStream(new CannedTokenStream(new Token("foo", 10, 13), new Token("foo", 7, 9)));
- doc.add(field);
- writer.addDocument(doc);
- writer.commit();
-
- // 2nd segment
- doc = new Document();
- field = new Field("foo", "bar", fieldType);
- field.setTokenStream(new CannedTokenStream(new Token("bar", 15, 17), new Token("bar", 1, 5)));
- doc.add(field);
- writer.addDocument(doc);
-
- writer.close();
-
- dir.close();
- }
- */
-
- public void testFixBrokenOffsetsIndex() throws IOException {
- InputStream resource = getClass().getResourceAsStream("index.630.brokenoffsets.zip");
- assertNotNull("Broken offsets index not found", resource);
- Path path = createTempDir("brokenoffsets");
- TestUtil.unzip(resource, path);
- Directory dir = newFSDirectory(path);
-
- // OK: index is 6.3.0 so offsets not checked:
- TestUtil.checkIndex(dir);
-
- MockDirectoryWrapper tmpDir = newMockDirectory();
- tmpDir.setCheckIndexOnClose(false);
- IndexWriter w = new IndexWriter(tmpDir, new IndexWriterConfig());
- IndexWriter finalW = w;
- IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> finalW.addIndexes(dir));
- assertTrue(e.getMessage(), e.getMessage().startsWith("Cannot use addIndexes(Directory) with indexes that have been created by a different Lucene version."));
- w.close();
- // OK: addIndexes(Directory...) refuses to execute if the index creation version is different so broken offsets are not carried over
- tmpDir.close();
-
- final MockDirectoryWrapper tmpDir2 = newMockDirectory();
- tmpDir2.setCheckIndexOnClose(false);
- w = new IndexWriter(tmpDir2, new IndexWriterConfig());
- DirectoryReader reader = DirectoryReader.open(dir);
- List<LeafReaderContext> leaves = reader.leaves();
- CodecReader[] codecReaders = new CodecReader[leaves.size()];
- for(int i=0;i<leaves.size();i++) {
- codecReaders[i] = (CodecReader) leaves.get(i).reader();
- }
- IndexWriter finalW2 = w;
- e = expectThrows(IllegalArgumentException.class, () -> finalW2.addIndexes(codecReaders));
- assertEquals("Cannot merge a segment that has been created with major version 6 into this index which has been created by major version 7", e.getMessage());
- reader.close();
- w.close();
- tmpDir2.close();
-
- // Now run the tool and confirm the broken offsets are fixed:
- Path path2 = createTempDir("fixedbrokenoffsets").resolve("subdir");
- FixBrokenOffsets.main(new String[] {path.toString(), path2.toString()});
- Directory tmpDir3 = FSDirectory.open(path2);
- TestUtil.checkIndex(tmpDir3);
- tmpDir3.close();
-
- dir.close();
- }
-}