You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/09/26 07:20:44 UTC

svn commit: r1627714 - in /lucene/dev/branches/lucene5969/lucene: backward-codecs/src/java/org/apache/lucene/codecs/lucene46/ backward-codecs/src/test/org/apache/lucene/codecs/lucene46/ codecs/src/java/org/apache/lucene/codecs/simpletext/ core/src/java...

Author: rmuir
Date: Fri Sep 26 05:20:43 2014
New Revision: 1627714

URL: http://svn.apache.org/r1627714
Log:
LUCENE-5969, LUCENE-5895: fix sign bit bugs in segment/commit IDs, use byte[] representation

Modified:
    lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java
    lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java
    lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java
    lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java
    lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java
    lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
    lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoReader.java
    lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoWriter.java
    lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
    lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java
    lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
    lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/util/StringHelper.java
    lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java

Modified: lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java?rev=1627714&r1=1627713&r2=1627714&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java (original)
+++ lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java Fri Sep 26 05:20:43 2014
@@ -31,7 +31,7 @@ import org.apache.lucene.store.DataOutpu
  * <p>
  * Files:
  * <ul>
- *   <li><tt>.si</tt>: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files, Id, Footer
+ *   <li><tt>.si</tt>: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files, Footer
  * </ul>
  * </p>
  * Data types:
@@ -44,7 +44,6 @@ import org.apache.lucene.store.DataOutpu
  *   <li>Diagnostics --&gt; {@link DataOutput#writeStringStringMap Map&lt;String,String&gt;}</li>
  *   <li>IsCompoundFile --&gt; {@link DataOutput#writeByte Int8}</li>
  *   <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
- *   <li>Id --&gt; {@link DataOutput#writeString String}</li>
  * </ul>
  * </p>
  * Field Descriptions:
@@ -88,6 +87,5 @@ public class Lucene46SegmentInfoFormat e
   static final String CODEC_NAME = "Lucene46SegmentInfo";
   static final int VERSION_START = 0;
   static final int VERSION_CHECKSUM = 1;
-  static final int VERSION_ID = 2;
-  static final int VERSION_CURRENT = VERSION_ID;
+  static final int VERSION_CURRENT = VERSION_CHECKSUM;
 }

Modified: lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java?rev=1627714&r1=1627713&r2=1627714&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java (original)
+++ lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java Fri Sep 26 05:20:43 2014
@@ -65,13 +65,6 @@ public class Lucene46SegmentInfoReader e
       final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
       final Map<String,String> diagnostics = input.readStringStringMap();
       final Set<String> files = input.readStringSet();
-      
-      String id;
-      if (codecVersion >= Lucene46SegmentInfoFormat.VERSION_ID) {
-        id = input.readString();
-      } else {
-        id = null;
-      }
 
       if (codecVersion >= Lucene46SegmentInfoFormat.VERSION_CHECKSUM) {
         CodecUtil.checkFooter(input);
@@ -79,7 +72,7 @@ public class Lucene46SegmentInfoReader e
         CodecUtil.checkEOF(input);
       }
 
-      final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, id);
+      final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, null);
       si.setFiles(files);
 
       return si;

Modified: lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java?rev=1627714&r1=1627713&r2=1627714&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java (original)
+++ lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java Fri Sep 26 05:20:43 2014
@@ -64,7 +64,6 @@ public class Lucene46SegmentInfoWriter e
       output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
       output.writeStringStringMap(si.getDiagnostics());
       output.writeStringSet(si.files());
-      output.writeString(si.getId());
       CodecUtil.writeFooter(output);
       success = true;
     } finally {

Modified: lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java?rev=1627714&r1=1627713&r2=1627714&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java (original)
+++ lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java Fri Sep 26 05:20:43 2014
@@ -20,6 +20,7 @@ package org.apache.lucene.codecs.simplet
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 import java.text.ParseException;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
@@ -109,7 +110,7 @@ public class SimpleTextSegmentInfoReader
       
       SimpleTextUtil.readLine(input, scratch);
       assert StringHelper.startsWith(scratch.get(), SI_ID);
-      final String id = readString(SI_ID.length, scratch);
+      final byte[] id = Arrays.copyOfRange(scratch.bytes(), SI_ID.length, scratch.length());
 
       SimpleTextUtil.checkFooter(input);
 

Modified: lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java?rev=1627714&r1=1627713&r2=1627714&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java (original)
+++ lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java Fri Sep 26 05:20:43 2014
@@ -107,7 +107,7 @@ public class SimpleTextSegmentInfoWriter
       }
 
       SimpleTextUtil.write(output, SI_ID);
-      SimpleTextUtil.write(output, si.getId(), scratch);
+      SimpleTextUtil.write(output, new BytesRef(si.getId()));
       SimpleTextUtil.writeNewline(output);
       
       SimpleTextUtil.writeChecksum(output, scratch);

Modified: lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java?rev=1627714&r1=1627713&r2=1627714&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java (original)
+++ lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java Fri Sep 26 05:20:43 2014
@@ -19,6 +19,7 @@ package org.apache.lucene.codecs;
 
 
 import java.io.IOException;
+import java.util.Arrays;
 
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.IndexFormatTooNewException;
@@ -31,6 +32,7 @@ import org.apache.lucene.store.IndexInpu
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.StringHelper;
 
 /**
  * Utility class for reading and writing versioned headers.
@@ -94,12 +96,12 @@ public final class CodecUtil {
    * Writes a codec header for a per-segment, which records both a string to
    * identify the file, a version number, and the unique ID of the segment. 
    * This header can be parsed and validated with 
-   * {@link #checkSegmentHeader(DataInput, String, int, int, String) checkSegmentHeader()}.
+   * {@link #checkSegmentHeader(DataInput, String, int, int, byte[]) checkSegmentHeader()}.
    * <p>
    * CodecSegmentHeader --&gt; CodecHeader,SegmentID
    * <ul>
    *    <li>CodecHeader --&gt; {@link #writeHeader}
-   *    <li>SegmentID   --&gt; {@link DataOutput#writeString String}.
+   *    <li>SegmentID   --&gt; {@link DataOutput#writeByte byte}<sup>16</sup>.
    *        Unique identifier for the segment.
    * </ul>
    * <p>
@@ -113,13 +115,15 @@ public final class CodecUtil {
    * @param segmentID Unique identifier for the segment
    * @param version Version number
    * @throws IOException If there is an I/O error writing to the underlying medium.
-   * @throws IllegalArgumentException If the codec name is not simple ASCII, or is more than 127 characters in length
+   * @throws IllegalArgumentException If the codec name is not simple ASCII, or 
+   *         is more than 127 characters in length, or if segmentID is invalid.
    */
-  // nocommit: fix javadocs, add segmentLength()
-  public static void writeSegmentHeader(DataOutput out, String codec, int version, String segmentID) throws IOException {
+  public static void writeSegmentHeader(DataOutput out, String codec, int version, byte[] segmentID) throws IOException {
+    if (segmentID.length != StringHelper.ID_LENGTH) {
+      throw new IllegalArgumentException("Invalid id: " + StringHelper.idToString(segmentID));
+    }
     writeHeader(out, codec, version);
-    // nocommit: improve encoding of this ID
-    out.writeString(segmentID);
+    out.writeBytes(segmentID, 0, segmentID.length);
   }
 
   /**
@@ -132,6 +136,17 @@ public final class CodecUtil {
   public static int headerLength(String codec) {
     return 9+codec.length();
   }
+  
+  /**
+   * Computes the length of a segment header.
+   * 
+   * @param codec Codec name.
+   * @return length of the entire segment header.
+   * @see #writeSegmentHeader(DataOutput, String, int, byte[])
+   */
+  public static int segmentHeaderLength(String codec) {
+    return headerLength(codec) + StringHelper.ID_LENGTH;
+  }
 
   /**
    * Reads and validates a header previously written with 
@@ -192,7 +207,7 @@ public final class CodecUtil {
   
   /**
    * Reads and validates a header previously written with 
-   * {@link #writeSegmentHeader(DataOutput, String, int, String)}.
+   * {@link #writeSegmentHeader(DataOutput, String, int, byte[])}.
    * <p>
    * When reading a file, supply the expected <code>codec</code>,
    * expected version range (<code>minVersion to maxVersion</code>),
@@ -219,13 +234,15 @@ public final class CodecUtil {
    * @throws IndexFormatTooNewException If the actual version is greater 
    *         than <code>maxVersion</code>.
    * @throws IOException If there is an I/O error reading from the underlying medium.
-   * @see #writeSegmentHeader(DataOutput, String, int, String)
+   * @see #writeSegmentHeader(DataOutput, String, int, byte[])
    */
-  public static int checkSegmentHeader(DataInput in, String codec, int minVersion, int maxVersion, String segmentID) throws IOException {
+  public static int checkSegmentHeader(DataInput in, String codec, int minVersion, int maxVersion, byte[] segmentID) throws IOException {
     int version = checkHeader(in, codec, minVersion, maxVersion);
-    String id = in.readString();
-    if (!id.equals(segmentID)) {
-      throw new CorruptIndexException("file mismatch, expected segment id=" + segmentID + ", got=" + id, in);
+    byte id[] = new byte[StringHelper.ID_LENGTH];
+    in.readBytes(id, 0, id.length);
+    if (!Arrays.equals(id, segmentID)) {
+      throw new CorruptIndexException("file mismatch, expected segment id=" + StringHelper.idToString(segmentID) 
+                                                                 + ", got=" + StringHelper.idToString(id), in);
     }
     return version;
   }

Modified: lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoReader.java?rev=1627714&r1=1627713&r2=1627714&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoReader.java (original)
+++ lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoReader.java Fri Sep 26 05:20:43 2014
@@ -30,6 +30,7 @@ import org.apache.lucene.index.SegmentIn
 import org.apache.lucene.store.ChecksumIndexInput;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
+import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.Version;
 
 /**
@@ -69,7 +70,8 @@ public class Lucene50SegmentInfoReader e
         final Map<String,String> diagnostics = input.readStringStringMap();
         final Set<String> files = input.readStringSet();
         
-        String id = input.readString();
+        byte[] id = new byte[StringHelper.ID_LENGTH];
+        input.readBytes(id, 0, id.length);
         
         si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, id);
         si.setFiles(files);

Modified: lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoWriter.java?rev=1627714&r1=1627713&r2=1627714&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoWriter.java (original)
+++ lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoWriter.java Fri Sep 26 05:20:43 2014
@@ -64,7 +64,8 @@ public class Lucene50SegmentInfoWriter e
       output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
       output.writeStringStringMap(si.getDiagnostics());
       output.writeStringSet(si.files());
-      output.writeString(si.getId());
+      byte[] id = si.getId();
+      output.writeBytes(id, 0, id.length);
       CodecUtil.writeFooter(output);
       success = true;
     } finally {

Modified: lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java?rev=1627714&r1=1627713&r2=1627714&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java Fri Sep 26 05:20:43 2014
@@ -47,6 +47,7 @@ import org.apache.lucene.util.CommandLin
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LongBitSet;
+import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.Version;
 
 
@@ -514,7 +515,7 @@ public class CheckIndex {
     }
 
     msg(infoStream, "Segments file=" + segmentsFileName + " numSegments=" + numSegments
-        + " " + versionString + " id=" + sis.getId() + " format=" + sFormat + userDataString);
+        + " " + versionString + " id=" + StringHelper.idToString(sis.getId()) + " format=" + sFormat + userDataString);
 
     if (onlySegments != null) {
       result.partial = true;
@@ -565,7 +566,7 @@ public class CheckIndex {
 
       try {
         msg(infoStream, "    version=" + (version == null ? "3.0" : version));
-        msg(infoStream, "    id=" + info.info.getId());
+        msg(infoStream, "    id=" + StringHelper.idToString(info.info.getId()));
         final Codec codec = info.info.getCodec();
         msg(infoStream, "    codec=" + codec);
         segInfoStat.codec = codec;

Modified: lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java?rev=1627714&r1=1627713&r2=1627714&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java (original)
+++ lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java Fri Sep 26 05:20:43 2014
@@ -18,6 +18,7 @@ package org.apache.lucene.index;
  */
 
 
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashSet;
@@ -59,7 +60,7 @@ public final class SegmentInfo {
   private boolean isCompoundFile;
 
   /** Id that uniquely identifies this segment. */
-  private final String id;
+  private final byte[] id;
 
   private Codec codec;
 
@@ -89,7 +90,7 @@ public final class SegmentInfo {
    */
   public SegmentInfo(Directory dir, Version version, String name, int docCount,
                      boolean isCompoundFile, Codec codec, Map<String,String> diagnostics,
-                     String id) {
+                     byte[] id) {
     assert !(dir instanceof TrackingDirectoryWrapper);
     this.dir = dir;
     this.version = version;
@@ -99,6 +100,9 @@ public final class SegmentInfo {
     this.codec = codec;
     this.diagnostics = diagnostics;
     this.id = id;
+    if (id != null && id.length != StringHelper.ID_LENGTH) {
+      throw new IllegalArgumentException("invalid id: " + Arrays.toString(id));
+    }
   }
 
   /**
@@ -218,8 +222,8 @@ public final class SegmentInfo {
   }
 
   /** Return the id that uniquely identifies this segment. */
-  public String getId() {
-    return id;
+  public byte[] getId() {
+    return id == null ? null : id.clone();
   }
 
   private Set<String> setFiles;

Modified: lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java?rev=1627714&r1=1627713&r2=1627714&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java (original)
+++ lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java Fri Sep 26 05:20:43 2014
@@ -125,8 +125,8 @@ public final class SegmentInfos implemen
   /** The file format version for the segments_N codec header, since 4.9+ */
   public static final int VERSION_49 = 3;
 
-  /** The file format version for the segments_N codec header, since 4.11+ */
-  public static final int VERSION_411 = 4;
+  /** The file format version for the segments_N codec header, since 5.0+ */
+  public static final int VERSION_50 = 4;
 
   /** Used to name new segments. */
   // TODO: should this be a long ...?
@@ -151,8 +151,8 @@ public final class SegmentInfos implemen
    */
   private static PrintStream infoStream = null;
 
-  /** Id for this commit; only written starting with Lucene 4.11 */
-  private String id;
+  /** Id for this commit; only written starting with Lucene 5.0 */
+  private byte[] id;
 
   /** Sole constructor. Typically you call this and then
    *  use {@link #read(Directory) or
@@ -262,10 +262,10 @@ public final class SegmentInfos implemen
                                                  nextGeneration);
   }
 
-  /** Since Lucene 4.11, every commit (segments_N) writes a unique id.  This will
-   *  return that id, or null if this commit was pre-4.11. */
-  public String getId() {
-    return id;
+  /** Since Lucene 5.0, every commit (segments_N) writes a unique id.  This will
+   *  return that id, or null if this commit was 5.0. */
+  public byte[] getId() {
+    return id == null ? null : id.clone();
   }
 
   /**
@@ -296,7 +296,7 @@ public final class SegmentInfos implemen
         throw new IndexFormatTooOldException(input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC);
       }
       // 4.0+
-      int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_411);
+      int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_50);
       version = input.readLong();
       counter = input.readInt();
       int numSegments = input.readInt();
@@ -361,8 +361,9 @@ public final class SegmentInfos implemen
         add(siPerCommit);
       }
       userData = input.readStringStringMap();
-      if (format >= VERSION_411) {
-        id = input.readString();
+      if (format >= VERSION_50) {
+        id = new byte[StringHelper.ID_LENGTH];
+        input.readBytes(id, 0, id.length);
       }
 
       if (format >= VERSION_48) {
@@ -425,7 +426,7 @@ public final class SegmentInfos implemen
 
     try {
       segnOutput = directory.createOutput(segmentFileName, IOContext.DEFAULT);
-      CodecUtil.writeHeader(segnOutput, "segments", VERSION_411);
+      CodecUtil.writeHeader(segnOutput, "segments", VERSION_50);
       segnOutput.writeLong(version); 
       segnOutput.writeInt(counter); // write counter
       segnOutput.writeInt(size()); // write infos
@@ -451,7 +452,8 @@ public final class SegmentInfos implemen
         assert si.dir == directory;
       }
       segnOutput.writeStringStringMap(userData);
-      segnOutput.writeString(StringHelper.randomId());
+      byte[] id = StringHelper.randomId();
+      segnOutput.writeBytes(id, 0, id.length);
       CodecUtil.writeFooter(segnOutput);
       segnOutput.close();
       directory.sync(Collections.singleton(segmentFileName));

Modified: lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/util/StringHelper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/util/StringHelper.java?rev=1627714&r1=1627713&r2=1627714&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/util/StringHelper.java (original)
+++ lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/util/StringHelper.java Fri Sep 26 05:20:43 2014
@@ -232,21 +232,21 @@ public abstract class StringHelper {
 
   // Holds 128 bit unsigned value:
   private static BigInteger nextId;
-  private static final BigInteger idMask;
+  private static final BigInteger mask128;
   private static final Object idLock = new Object();
-  private static final String idPad = "00000000000000000000000000000000";
 
   static {
-    byte[] maskBytes = new byte[16];
-    Arrays.fill(maskBytes, (byte) 0xff);
-    idMask = new BigInteger(maskBytes);
+    // 128 bit unsigned mask
+    byte[] maskBytes128 = new byte[16];
+    Arrays.fill(maskBytes128, (byte) 0xff);
+    mask128 = new BigInteger(1, maskBytes128);
+    
     String prop = System.getProperty("tests.seed");
 
     // State for xorshift128:
     long x0;
     long x1;
 
-    long seed;
     if (prop != null) {
       // So if there is a test failure that somehow relied on this id,
       // we remain reproducible based on the test seed:
@@ -280,17 +280,25 @@ public abstract class StringHelper {
       s1 ^= s1 << 23; // a
       x1 = s1 ^ s0 ^ (s1 >>> 17) ^ (s0 >>> 26); // b, c
     }
+    
+    // 64-bit unsigned mask
+    byte[] maskBytes64 = new byte[8];
+    Arrays.fill(maskBytes64, (byte) 0xff);
+    BigInteger mask64 = new BigInteger(1, maskBytes64);
 
     // First make unsigned versions of x0, x1:
-    BigInteger unsignedX0 = new BigInteger(1, BigInteger.valueOf(x0).toByteArray());
-    BigInteger unsignedX1 = new BigInteger(1, BigInteger.valueOf(x1).toByteArray());
+    BigInteger unsignedX0 = BigInteger.valueOf(x0).and(mask64);
+    BigInteger unsignedX1 = BigInteger.valueOf(x1).and(mask64);
 
     // Concatentate bits of x0 and x1, as unsigned 128 bit integer:
     nextId = unsignedX0.shiftLeft(64).or(unsignedX1);
   }
+  
+  /** length in bytes of an ID */
+  public static final int ID_LENGTH = 16;
 
   /** Generates a non-cryptographic globally unique id. */
-  public static String randomId() {
+  public static byte[] randomId() {
 
     // NOTE: we don't use Java's UUID.randomUUID() implementation here because:
     //
@@ -306,15 +314,42 @@ public abstract class StringHelper {
     //     what impact that has on the period, whereas the simple ++ (mod 2^128)
     //     we use here is guaranteed to have the full period.
 
-    String id;
+    byte bits[];
     synchronized(idLock) {
-      id = nextId.toString(16);
-      nextId = nextId.add(BigInteger.ONE).and(idMask);
+      bits = nextId.toByteArray();
+      nextId = nextId.add(BigInteger.ONE).and(mask128);
+    }
+    
+    // toByteArray() always returns a sign bit, so it may require an extra byte (always zero)
+    if (bits.length > ID_LENGTH) {
+      assert bits.length == ID_LENGTH + 1;
+      assert bits[0] == 0;
+      return Arrays.copyOfRange(bits, 1, bits.length);
+    } else {
+      byte[] result = new byte[ID_LENGTH];
+      System.arraycopy(bits, 0, result, result.length - bits.length, bits.length);
+      return result;
+    }
+  }
+  
+  /** 
+   * Helper method to render an ID as a string, for debugging
+   * <p>
+   * Returns the string {@code (null)} if the id is null.
+   * Otherwise, returns a string representation for debugging.
+   * Never throws an exception. The returned string may
+   * indicate if the id is definitely invalid.
+   */
+  public static String idToString(byte id[]) {
+    if (id == null) {
+      return "(null)";
+    } else {
+      StringBuilder sb = new StringBuilder();
+      sb.append(new BigInteger(1, id).toString(Character.MAX_RADIX));
+      if (id.length != ID_LENGTH) {
+        sb.append(" (INVALID FORMAT)");
+      }
+      return sb.toString();
     }
-
-    assert id.length() <= 32: "id=" + id;
-    id = idPad.substring(id.length()) + id;
-
-    return id;
   }
 }

Modified: lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1627714&r1=1627713&r2=1627714&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java Fri Sep 26 05:20:43 2014
@@ -2767,11 +2767,13 @@ public class TestIndexWriter extends Luc
     
     SegmentInfos sis = new SegmentInfos();
     sis.read(d);
-    String id1 = sis.getId();
+    byte[] id1 = sis.getId();
     assertNotNull(id1);
+    assertEquals(StringHelper.ID_LENGTH, id1.length);
     
-    String id2 = sis.info(0).info.getId();
+    byte[] id2 = sis.info(0).info.getId();
     assertNotNull(id2);
+    assertEquals(StringHelper.ID_LENGTH, id2.length);
 
     // Make sure CheckIndex includes id output:
     ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
@@ -2784,14 +2786,14 @@ public class TestIndexWriter extends Luc
     assertTrue(s, indexStatus != null && indexStatus.clean);
 
     // Commit id is always stored:
-    assertTrue("missing id=" + id1 + " in:\n" + s, s.contains("id=" + id1));
+    assertTrue("missing id=" + StringHelper.idToString(id1) + " in:\n" + s, s.contains("id=" + StringHelper.idToString(id1)));
 
-    assertTrue("missing id=" + id2 + " in:\n" + s, s.contains("id=" + id2));
+    assertTrue("missing id=" + StringHelper.idToString(id1) + " in:\n" + s, s.contains("id=" + StringHelper.idToString(id1)));
     d.close();
 
     Set<String> ids = new HashSet<>();
     for(int i=0;i<100000;i++) {
-      String id = StringHelper.randomId();
+      String id = StringHelper.idToString(StringHelper.randomId());
       assertFalse("id=" + id + " i=" + i, ids.contains(id));
       ids.add(id);
     }