You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2017/07/04 17:59:59 UTC

[09/23] lucene-solr:jira/solr-10879: Remove FixBrokenOffsets since 7.0+ indices cannot have broken offsets.

Remove FixBrokenOffsets since 7.0+ indices cannot have broken offsets.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/869a48cc
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/869a48cc
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/869a48cc

Branch: refs/heads/jira/solr-10879
Commit: 869a48ccacddd3ad9e1dac68eee6d66b78a7049b
Parents: e809e09
Author: Adrien Grand <jp...@gmail.com>
Authored: Mon Jul 3 14:12:25 2017 +0200
Committer: Adrien Grand <jp...@gmail.com>
Committed: Mon Jul 3 14:12:25 2017 +0200

----------------------------------------------------------------------
 .../apache/lucene/index/FixBrokenOffsets.java   | 138 -------------------
 .../lucene/index/TestFixBrokenOffsets.java      | 112 ---------------
 2 files changed, 250 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/869a48cc/lucene/backward-codecs/src/java/org/apache/lucene/index/FixBrokenOffsets.java
----------------------------------------------------------------------
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/index/FixBrokenOffsets.java b/lucene/backward-codecs/src/java/org/apache/lucene/index/FixBrokenOffsets.java
deleted file mode 100644
index 9b3615e..0000000
--- a/lucene/backward-codecs/src/java/org/apache/lucene/index/FixBrokenOffsets.java
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.index;
-
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.util.List;
-
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.SuppressForbidden;
-
-/**
- * Command-line tool that reads from a source index and
- * writes to a dest index, correcting any broken offsets
- * in the process.
- *
- * @lucene.experimental
- */
-public class FixBrokenOffsets {
-  public SegmentInfos infos;
-
-  FSDirectory fsDir;
-
-  Path dir;
-
-  @SuppressForbidden(reason = "System.out required: command line tool")
-  public static void main(String[] args) throws IOException {
-    if (args.length < 2) {
-      System.err.println("Usage: FixBrokenOffsetse <srcDir> <destDir>");
-      return;
-    }
-    Path srcPath = Paths.get(args[0]);
-    if (!Files.exists(srcPath)) {
-      throw new RuntimeException("srcPath " + srcPath.toAbsolutePath() + " doesn't exist");
-    }
-    Path destPath = Paths.get(args[1]);
-    if (Files.exists(destPath)) {
-      throw new RuntimeException("destPath " + destPath.toAbsolutePath() + " already exists; please remove it and re-run");
-    }
-    Directory srcDir = FSDirectory.open(srcPath);
-    DirectoryReader reader = DirectoryReader.open(srcDir);
-
-    List<LeafReaderContext> leaves = reader.leaves();
-    CodecReader[] filtered = new CodecReader[leaves.size()];
-    for(int i=0;i<leaves.size();i++) {
-      filtered[i] = SlowCodecReaderWrapper.wrap(new FilterLeafReader(leaves.get(i).reader()) {
-          @Override
-          public Fields getTermVectors(int docID) throws IOException {
-            Fields termVectors = in.getTermVectors(docID);
-            if (termVectors == null) {
-              return null;
-            }
-            return new FilterFields(termVectors) {
-              @Override
-              public Terms terms(String field) throws IOException {
-                return new FilterTerms(super.terms(field)) {
-                  @Override
-                  public TermsEnum iterator() throws IOException {
-                    return new FilterTermsEnum(super.iterator()) {
-                      @Override
-                      public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
-                        return new FilterPostingsEnum(super.postings(reuse, flags)) {
-                          int nextLastStartOffset = 0;
-                          int lastStartOffset = 0;
-
-                          @Override
-                          public int nextPosition() throws IOException {
-                            int pos = super.nextPosition();
-                            lastStartOffset = nextLastStartOffset;
-                            nextLastStartOffset = startOffset();
-                            return pos;
-                          }
-                          
-                          @Override
-                          public int startOffset() throws IOException {
-                            int offset = super.startOffset();
-                            if (offset < lastStartOffset) {
-                              offset = lastStartOffset;
-                            }
-                            return offset;
-                          }
-                          
-                          @Override
-                          public int endOffset() throws IOException {
-                            int offset = super.endOffset();
-                            if (offset < lastStartOffset) {
-                              offset = lastStartOffset;
-                            }
-                            return offset;
-                          }
-                        };
-                      }
-                    };
-                  }
-                };
-              }
-            };
-          }
-
-          @Override
-          public CacheHelper getCoreCacheHelper() {
-            return null;
-          }
-
-          @Override
-          public CacheHelper getReaderCacheHelper() {
-            return null;
-          }
-        });
-    }
-
-    Directory destDir = FSDirectory.open(destPath);
-    // We need to maintain the same major version
-    int createdMajor = SegmentInfos.readLatestCommit(srcDir).getIndexCreatedVersionMajor();
-    new SegmentInfos(createdMajor).commit(destDir);
-    IndexWriter writer = new IndexWriter(destDir, new IndexWriterConfig());
-    writer.addIndexes(filtered);
-    IOUtils.close(writer, reader, srcDir, destDir);
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/869a48cc/lucene/backward-codecs/src/test/org/apache/lucene/index/TestFixBrokenOffsets.java
----------------------------------------------------------------------
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestFixBrokenOffsets.java b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestFixBrokenOffsets.java
deleted file mode 100644
index 46b30d3..0000000
--- a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestFixBrokenOffsets.java
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.index;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.file.Path;
-import java.util.List;
-
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.store.MockDirectoryWrapper;
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.TestUtil;
-
-public class TestFixBrokenOffsets extends LuceneTestCase {
-
-  // Run this in Lucene 6.x:
-  //
-  //     ant test -Dtestcase=TestFixBrokenOffsets -Dtestmethod=testCreateBrokenOffsetsIndex -Dtests.codec=default -Dtests.useSecurityManager=false
-  /*
-  public void testCreateBrokenOffsetsIndex() throws IOException {
-
-    Path indexDir = Paths.get("/tmp/brokenoffsets");
-    Files.deleteIfExists(indexDir);
-    Directory dir = newFSDirectory(indexDir);
-    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
-
-    Document doc = new Document();
-    FieldType fieldType = new FieldType(TextField.TYPE_STORED);
-    fieldType.setStoreTermVectors(true);
-    fieldType.setStoreTermVectorPositions(true);
-    fieldType.setStoreTermVectorOffsets(true);
-    Field field = new Field("foo", "bar", fieldType);
-    field.setTokenStream(new CannedTokenStream(new Token("foo", 10, 13), new Token("foo", 7, 9)));
-    doc.add(field);
-    writer.addDocument(doc);
-    writer.commit();
-
-    // 2nd segment
-    doc = new Document();
-    field = new Field("foo", "bar", fieldType);
-    field.setTokenStream(new CannedTokenStream(new Token("bar", 15, 17), new Token("bar", 1, 5)));
-    doc.add(field);
-    writer.addDocument(doc);
-    
-    writer.close();
-
-    dir.close();
-  }
-  */
-
-  public void testFixBrokenOffsetsIndex() throws IOException {
-    InputStream resource = getClass().getResourceAsStream("index.630.brokenoffsets.zip");
-    assertNotNull("Broken offsets index not found", resource);
-    Path path = createTempDir("brokenoffsets");
-    TestUtil.unzip(resource, path);
-    Directory dir = newFSDirectory(path);
-
-    // OK: index is 6.3.0 so offsets not checked:
-    TestUtil.checkIndex(dir);
-    
-    MockDirectoryWrapper tmpDir = newMockDirectory();
-    tmpDir.setCheckIndexOnClose(false);
-    IndexWriter w = new IndexWriter(tmpDir, new IndexWriterConfig());
-    IndexWriter finalW = w;
-    IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> finalW.addIndexes(dir));
-    assertTrue(e.getMessage(), e.getMessage().startsWith("Cannot use addIndexes(Directory) with indexes that have been created by a different Lucene version."));
-    w.close();
-    // OK: addIndexes(Directory...) refuses to execute if the index creation version is different so broken offsets are not carried over
-    tmpDir.close();
-
-    final MockDirectoryWrapper tmpDir2 = newMockDirectory();
-    tmpDir2.setCheckIndexOnClose(false);
-    w = new IndexWriter(tmpDir2, new IndexWriterConfig());
-    DirectoryReader reader = DirectoryReader.open(dir);
-    List<LeafReaderContext> leaves = reader.leaves();
-    CodecReader[] codecReaders = new CodecReader[leaves.size()];
-    for(int i=0;i<leaves.size();i++) {
-      codecReaders[i] = (CodecReader) leaves.get(i).reader();
-    }
-    IndexWriter finalW2 = w;
-    e = expectThrows(IllegalArgumentException.class, () -> finalW2.addIndexes(codecReaders));
-    assertEquals("Cannot merge a segment that has been created with major version 6 into this index which has been created by major version 7", e.getMessage());
-    reader.close();
-    w.close();
-    tmpDir2.close();
-
-    // Now run the tool and confirm the broken offsets are fixed:
-    Path path2 = createTempDir("fixedbrokenoffsets").resolve("subdir");
-    FixBrokenOffsets.main(new String[] {path.toString(), path2.toString()});
-    Directory tmpDir3 = FSDirectory.open(path2);
-    TestUtil.checkIndex(tmpDir3);
-    tmpDir3.close();
-    
-    dir.close();
-  }
-}