You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2017/07/05 07:43:41 UTC

lucene-solr:master: LUCENE-7898: Remove hasSegID from SegmentInfos.

Repository: lucene-solr
Updated Branches:
  refs/heads/master 36bca198f -> 708462ede


LUCENE-7898: Remove hasSegID from SegmentInfos.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/708462ed
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/708462ed
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/708462ed

Branch: refs/heads/master
Commit: 708462eded31917be9805431ba169c8c81e89d67
Parents: 36bca19
Author: Adrien Grand <jp...@gmail.com>
Authored: Wed Jul 5 09:22:33 2017 +0200
Committer: Adrien Grand <jp...@gmail.com>
Committed: Wed Jul 5 09:43:10 2017 +0200

----------------------------------------------------------------------
 .../org/apache/lucene/index/SegmentInfos.java   | 54 ++++++--------------
 1 file changed, 16 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/708462ed/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
index e3761cc..a00a470 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
@@ -65,7 +65,7 @@ import org.apache.lucene.util.Version;
  * Files:
  * <ul>
  * <li><tt>segments_N</tt>: Header, LuceneVersion, Version, NameCounter, SegCount, MinSegmentLuceneVersion, &lt;SegName,
- * HasSegID, SegID, SegCodec, DelGen, DeletionCount, FieldInfosGen, DocValuesGen,
+ * SegID, SegCodec, DelGen, DeletionCount, FieldInfosGen, DocValuesGen,
  * UpdatesFiles&gt;<sup>SegCount</sup>, CommitUserData, Footer
  * </ul>
  * Data types:
@@ -78,7 +78,6 @@ import org.apache.lucene.util.Version;
  * {@link DataOutput#writeInt Int32}</li>
  * <li>Generation, Version, DelGen, Checksum, FieldInfosGen, DocValuesGen --&gt;
  * {@link DataOutput#writeLong Int64}</li>
- * <li>HasSegID --&gt; {@link DataOutput#writeByte Int8}</li>
  * <li>SegID --&gt; {@link DataOutput#writeByte Int8<sup>ID_LENGTH</sup>}</li>
  * <li>SegName, SegCodec --&gt; {@link DataOutput#writeString String}</li>
  * <li>CommitUserData --&gt; {@link DataOutput#writeMapOfStrings
@@ -100,9 +99,6 @@ import org.apache.lucene.util.Version;
  * <li>DeletionCount records the number of deleted documents in this segment.</li>
  * <li>SegCodec is the {@link Codec#getName() name} of the Codec that encoded
  * this segment.</li>
- * <li>HasSegID is nonzero if the segment has an identifier. Otherwise, when it is 0
- * the identifier is {@code null} and no SegID is written. Null only happens for Lucene
- * 4.x segments referenced in commits.</li>
  * <li>SegID is the identifier of the Codec that encoded this segment. </li>
  * <li>CommitUserData stores an optional user-supplied opaque
  * Map&lt;String,String&gt; that was passed to
@@ -345,17 +341,17 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
     long totalDocs = 0;
     for (int seg = 0; seg < numSegments; seg++) {
       String segName = input.readString();
-      final byte segmentID[];
-      byte hasID = input.readByte();
-      if (hasID == 1) {
-        segmentID = new byte[StringHelper.ID_LENGTH];
-        input.readBytes(segmentID, 0, segmentID.length);
-      } else if (hasID == 0) {
-        throw new IndexFormatTooOldException(input, "Segment is from Lucene 4.x");
-      } else {
-        throw new CorruptIndexException("invalid hasID byte, got: " + hasID, input);
+      if (format < VERSION_70) {
+        byte hasID = input.readByte();
+        if (hasID == 0) {
+          throw new IndexFormatTooOldException(input, "Segment is from Lucene 4.x");
+        } else if (hasID != 1) {
+          throw new CorruptIndexException("invalid hasID byte, got: " + hasID, input);
+        }
       }
-      Codec codec = readCodec(input, format < VERSION_53);
+      byte[] segmentID = new byte[StringHelper.ID_LENGTH];
+      input.readBytes(segmentID, 0, segmentID.length);
+      Codec codec = readCodec(input);
       SegmentInfo info = codec.segmentInfoFormat().read(directory, segName, segmentID, IOContext.READ);
       info.setCodec(codec);
       totalDocs += info.maxDoc();
@@ -409,24 +405,12 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
     return infos;
   }
 
-  private static final List<String> unsupportedCodecs = Arrays.asList(
-      "Lucene3x", "Lucene40", "Lucene41", "Lucene42", "Lucene45", "Lucene46", "Lucene49", "Lucene410"
-  );
-
-  private static Codec readCodec(DataInput input, boolean unsupportedAllowed) throws IOException {
+  private static Codec readCodec(DataInput input) throws IOException {
     final String name = input.readString();
     try {
       return Codec.forName(name);
     } catch (IllegalArgumentException e) {
-      // give better error messages if we can, first check if this is a legacy codec
-      if (unsupportedCodecs.contains(name)) {
-        // We should only get here on pre-5.3 indices, but we can't test this until 7.0 when 5.x indices become too old:
-        assert unsupportedAllowed;
-        IOException newExc = new IndexFormatTooOldException(input, "Codec '" + name + "' is too old");
-        newExc.initCause(e);
-        throw newExc;
-      }
-      // or maybe it's an old default codec that moved
+      // maybe it's an old default codec that moved
       if (name.startsWith("Lucene")) {
         throw new IllegalArgumentException("Could not load codec '" + name + "'.  Did you forget to add lucene-backward-codecs.jar?", e);
       }
@@ -523,16 +507,10 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
       }
       out.writeString(si.name);
       byte segmentID[] = si.getId();
-      // TODO: remove this in lucene 6, we don't need to include 4.x segments in commits anymore
-      if (segmentID == null) {
-        out.writeByte((byte)0);
-      } else {
-        if (segmentID.length != StringHelper.ID_LENGTH) {
-          throw new IllegalStateException("cannot write segment: invalid id segment=" + si.name + "id=" + StringHelper.idToString(segmentID));
-        }
-        out.writeByte((byte)1);
-        out.writeBytes(segmentID, segmentID.length);
+      if (segmentID.length != StringHelper.ID_LENGTH) {
+        throw new IllegalStateException("cannot write segment: invalid id segment=" + si.name + "id=" + StringHelper.idToString(segmentID));
       }
+      out.writeBytes(segmentID, segmentID.length);
       out.writeString(si.getCodec().getName());
       out.writeLong(siPerCommit.getDelGen());
       int delCount = siPerCommit.getDelCount();