You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/09/26 07:20:44 UTC
svn commit: r1627714 - in /lucene/dev/branches/lucene5969/lucene:
backward-codecs/src/java/org/apache/lucene/codecs/lucene46/
backward-codecs/src/test/org/apache/lucene/codecs/lucene46/
codecs/src/java/org/apache/lucene/codecs/simpletext/ core/src/java...
Author: rmuir
Date: Fri Sep 26 05:20:43 2014
New Revision: 1627714
URL: http://svn.apache.org/r1627714
Log:
LUCENE-5969, LUCENE-5895: fix sign bit bugs in segment/commit IDs, use byte[] representation
Modified:
lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java
lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java
lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java
lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java
lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java
lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoReader.java
lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoWriter.java
lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java
lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/util/StringHelper.java
lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
Modified: lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java?rev=1627714&r1=1627713&r2=1627714&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java (original)
+++ lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java Fri Sep 26 05:20:43 2014
@@ -31,7 +31,7 @@ import org.apache.lucene.store.DataOutpu
* <p>
* Files:
* <ul>
- * <li><tt>.si</tt>: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files, Id, Footer
+ * <li><tt>.si</tt>: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files, Footer
* </ul>
* </p>
* Data types:
@@ -44,7 +44,6 @@ import org.apache.lucene.store.DataOutpu
* <li>Diagnostics --> {@link DataOutput#writeStringStringMap Map<String,String>}</li>
* <li>IsCompoundFile --> {@link DataOutput#writeByte Int8}</li>
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
- * <li>Id --> {@link DataOutput#writeString String}</li>
* </ul>
* </p>
* Field Descriptions:
@@ -88,6 +87,5 @@ public class Lucene46SegmentInfoFormat e
static final String CODEC_NAME = "Lucene46SegmentInfo";
static final int VERSION_START = 0;
static final int VERSION_CHECKSUM = 1;
- static final int VERSION_ID = 2;
- static final int VERSION_CURRENT = VERSION_ID;
+ static final int VERSION_CURRENT = VERSION_CHECKSUM;
}
Modified: lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java?rev=1627714&r1=1627713&r2=1627714&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java (original)
+++ lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java Fri Sep 26 05:20:43 2014
@@ -65,13 +65,6 @@ public class Lucene46SegmentInfoReader e
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
final Map<String,String> diagnostics = input.readStringStringMap();
final Set<String> files = input.readStringSet();
-
- String id;
- if (codecVersion >= Lucene46SegmentInfoFormat.VERSION_ID) {
- id = input.readString();
- } else {
- id = null;
- }
if (codecVersion >= Lucene46SegmentInfoFormat.VERSION_CHECKSUM) {
CodecUtil.checkFooter(input);
@@ -79,7 +72,7 @@ public class Lucene46SegmentInfoReader e
CodecUtil.checkEOF(input);
}
- final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, id);
+ final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, null);
si.setFiles(files);
return si;
Modified: lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java?rev=1627714&r1=1627713&r2=1627714&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java (original)
+++ lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java Fri Sep 26 05:20:43 2014
@@ -64,7 +64,6 @@ public class Lucene46SegmentInfoWriter e
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
output.writeStringStringMap(si.getDiagnostics());
output.writeStringSet(si.files());
- output.writeString(si.getId());
CodecUtil.writeFooter(output);
success = true;
} finally {
Modified: lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java?rev=1627714&r1=1627713&r2=1627714&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java (original)
+++ lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java Fri Sep 26 05:20:43 2014
@@ -20,6 +20,7 @@ package org.apache.lucene.codecs.simplet
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
+import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
@@ -109,7 +110,7 @@ public class SimpleTextSegmentInfoReader
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_ID);
- final String id = readString(SI_ID.length, scratch);
+ final byte[] id = Arrays.copyOfRange(scratch.bytes(), SI_ID.length, scratch.length());
SimpleTextUtil.checkFooter(input);
Modified: lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java?rev=1627714&r1=1627713&r2=1627714&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java (original)
+++ lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java Fri Sep 26 05:20:43 2014
@@ -107,7 +107,7 @@ public class SimpleTextSegmentInfoWriter
}
SimpleTextUtil.write(output, SI_ID);
- SimpleTextUtil.write(output, si.getId(), scratch);
+ SimpleTextUtil.write(output, new BytesRef(si.getId()));
SimpleTextUtil.writeNewline(output);
SimpleTextUtil.writeChecksum(output, scratch);
Modified: lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java?rev=1627714&r1=1627713&r2=1627714&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java (original)
+++ lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java Fri Sep 26 05:20:43 2014
@@ -19,6 +19,7 @@ package org.apache.lucene.codecs;
import java.io.IOException;
+import java.util.Arrays;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexFormatTooNewException;
@@ -31,6 +32,7 @@ import org.apache.lucene.store.IndexInpu
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.StringHelper;
/**
* Utility class for reading and writing versioned headers.
@@ -94,12 +96,12 @@ public final class CodecUtil {
* Writes a codec header for a per-segment, which records both a string to
* identify the file, a version number, and the unique ID of the segment.
* This header can be parsed and validated with
- * {@link #checkSegmentHeader(DataInput, String, int, int, String) checkSegmentHeader()}.
+ * {@link #checkSegmentHeader(DataInput, String, int, int, byte[]) checkSegmentHeader()}.
* <p>
* CodecSegmentHeader --> CodecHeader,SegmentID
* <ul>
* <li>CodecHeader --> {@link #writeHeader}
- * <li>SegmentID --> {@link DataOutput#writeString String}.
+ * <li>SegmentID --> {@link DataOutput#writeByte byte}<sup>16</sup>.
* Unique identifier for the segment.
* </ul>
* <p>
@@ -113,13 +115,15 @@ public final class CodecUtil {
* @param segmentID Unique identifier for the segment
* @param version Version number
* @throws IOException If there is an I/O error writing to the underlying medium.
- * @throws IllegalArgumentException If the codec name is not simple ASCII, or is more than 127 characters in length
+ * @throws IllegalArgumentException If the codec name is not simple ASCII, or
+ * is more than 127 characters in length, or if segmentID is invalid.
*/
- // nocommit: fix javadocs, add segmentLength()
- public static void writeSegmentHeader(DataOutput out, String codec, int version, String segmentID) throws IOException {
+ public static void writeSegmentHeader(DataOutput out, String codec, int version, byte[] segmentID) throws IOException {
+ if (segmentID.length != StringHelper.ID_LENGTH) {
+ throw new IllegalArgumentException("Invalid id: " + StringHelper.idToString(segmentID));
+ }
writeHeader(out, codec, version);
- // nocommit: improve encoding of this ID
- out.writeString(segmentID);
+ out.writeBytes(segmentID, 0, segmentID.length);
}
/**
@@ -132,6 +136,17 @@ public final class CodecUtil {
public static int headerLength(String codec) {
return 9+codec.length();
}
+
+ /**
+ * Computes the length of a segment header.
+ *
+ * @param codec Codec name.
+ * @return length of the entire segment header.
+ * @see #writeSegmentHeader(DataOutput, String, int, byte[])
+ */
+ public static int segmentHeaderLength(String codec) {
+ return headerLength(codec) + StringHelper.ID_LENGTH;
+ }
/**
* Reads and validates a header previously written with
@@ -192,7 +207,7 @@ public final class CodecUtil {
/**
* Reads and validates a header previously written with
- * {@link #writeSegmentHeader(DataOutput, String, int, String)}.
+ * {@link #writeSegmentHeader(DataOutput, String, int, byte[])}.
* <p>
* When reading a file, supply the expected <code>codec</code>,
* expected version range (<code>minVersion to maxVersion</code>),
@@ -219,13 +234,15 @@ public final class CodecUtil {
* @throws IndexFormatTooNewException If the actual version is greater
* than <code>maxVersion</code>.
* @throws IOException If there is an I/O error reading from the underlying medium.
- * @see #writeSegmentHeader(DataOutput, String, int, String)
+ * @see #writeSegmentHeader(DataOutput, String, int, byte[])
*/
- public static int checkSegmentHeader(DataInput in, String codec, int minVersion, int maxVersion, String segmentID) throws IOException {
+ public static int checkSegmentHeader(DataInput in, String codec, int minVersion, int maxVersion, byte[] segmentID) throws IOException {
int version = checkHeader(in, codec, minVersion, maxVersion);
- String id = in.readString();
- if (!id.equals(segmentID)) {
- throw new CorruptIndexException("file mismatch, expected segment id=" + segmentID + ", got=" + id, in);
+ byte id[] = new byte[StringHelper.ID_LENGTH];
+ in.readBytes(id, 0, id.length);
+ if (!Arrays.equals(id, segmentID)) {
+ throw new CorruptIndexException("file mismatch, expected segment id=" + StringHelper.idToString(segmentID)
+ + ", got=" + StringHelper.idToString(id), in);
}
return version;
}
Modified: lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoReader.java?rev=1627714&r1=1627713&r2=1627714&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoReader.java (original)
+++ lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoReader.java Fri Sep 26 05:20:43 2014
@@ -30,6 +30,7 @@ import org.apache.lucene.index.SegmentIn
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
+import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.Version;
/**
@@ -69,7 +70,8 @@ public class Lucene50SegmentInfoReader e
final Map<String,String> diagnostics = input.readStringStringMap();
final Set<String> files = input.readStringSet();
- String id = input.readString();
+ byte[] id = new byte[StringHelper.ID_LENGTH];
+ input.readBytes(id, 0, id.length);
si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, id);
si.setFiles(files);
Modified: lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoWriter.java?rev=1627714&r1=1627713&r2=1627714&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoWriter.java (original)
+++ lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoWriter.java Fri Sep 26 05:20:43 2014
@@ -64,7 +64,8 @@ public class Lucene50SegmentInfoWriter e
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
output.writeStringStringMap(si.getDiagnostics());
output.writeStringSet(si.files());
- output.writeString(si.getId());
+ byte[] id = si.getId();
+ output.writeBytes(id, 0, id.length);
CodecUtil.writeFooter(output);
success = true;
} finally {
Modified: lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java?rev=1627714&r1=1627713&r2=1627714&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java Fri Sep 26 05:20:43 2014
@@ -47,6 +47,7 @@ import org.apache.lucene.util.CommandLin
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LongBitSet;
+import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.Version;
@@ -514,7 +515,7 @@ public class CheckIndex {
}
msg(infoStream, "Segments file=" + segmentsFileName + " numSegments=" + numSegments
- + " " + versionString + " id=" + sis.getId() + " format=" + sFormat + userDataString);
+ + " " + versionString + " id=" + StringHelper.idToString(sis.getId()) + " format=" + sFormat + userDataString);
if (onlySegments != null) {
result.partial = true;
@@ -565,7 +566,7 @@ public class CheckIndex {
try {
msg(infoStream, " version=" + (version == null ? "3.0" : version));
- msg(infoStream, " id=" + info.info.getId());
+ msg(infoStream, " id=" + StringHelper.idToString(info.info.getId()));
final Codec codec = info.info.getCodec();
msg(infoStream, " codec=" + codec);
segInfoStat.codec = codec;
Modified: lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java?rev=1627714&r1=1627713&r2=1627714&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java (original)
+++ lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java Fri Sep 26 05:20:43 2014
@@ -18,6 +18,7 @@ package org.apache.lucene.index;
*/
+import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
@@ -59,7 +60,7 @@ public final class SegmentInfo {
private boolean isCompoundFile;
/** Id that uniquely identifies this segment. */
- private final String id;
+ private final byte[] id;
private Codec codec;
@@ -89,7 +90,7 @@ public final class SegmentInfo {
*/
public SegmentInfo(Directory dir, Version version, String name, int docCount,
boolean isCompoundFile, Codec codec, Map<String,String> diagnostics,
- String id) {
+ byte[] id) {
assert !(dir instanceof TrackingDirectoryWrapper);
this.dir = dir;
this.version = version;
@@ -99,6 +100,9 @@ public final class SegmentInfo {
this.codec = codec;
this.diagnostics = diagnostics;
this.id = id;
+ if (id != null && id.length != StringHelper.ID_LENGTH) {
+ throw new IllegalArgumentException("invalid id: " + Arrays.toString(id));
+ }
}
/**
@@ -218,8 +222,8 @@ public final class SegmentInfo {
}
/** Return the id that uniquely identifies this segment. */
- public String getId() {
- return id;
+ public byte[] getId() {
+ return id == null ? null : id.clone();
}
private Set<String> setFiles;
Modified: lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java?rev=1627714&r1=1627713&r2=1627714&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java (original)
+++ lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java Fri Sep 26 05:20:43 2014
@@ -125,8 +125,8 @@ public final class SegmentInfos implemen
/** The file format version for the segments_N codec header, since 4.9+ */
public static final int VERSION_49 = 3;
- /** The file format version for the segments_N codec header, since 4.11+ */
- public static final int VERSION_411 = 4;
+ /** The file format version for the segments_N codec header, since 5.0+ */
+ public static final int VERSION_50 = 4;
/** Used to name new segments. */
// TODO: should this be a long ...?
@@ -151,8 +151,8 @@ public final class SegmentInfos implemen
*/
private static PrintStream infoStream = null;
- /** Id for this commit; only written starting with Lucene 4.11 */
- private String id;
+ /** Id for this commit; only written starting with Lucene 5.0 */
+ private byte[] id;
/** Sole constructor. Typically you call this and then
* use {@link #read(Directory) or
@@ -262,10 +262,10 @@ public final class SegmentInfos implemen
nextGeneration);
}
- /** Since Lucene 4.11, every commit (segments_N) writes a unique id. This will
- * return that id, or null if this commit was pre-4.11. */
- public String getId() {
- return id;
+ /** Since Lucene 5.0, every commit (segments_N) writes a unique id. This will
+ * return that id, or null if this commit was 5.0. */
+ public byte[] getId() {
+ return id == null ? null : id.clone();
}
/**
@@ -296,7 +296,7 @@ public final class SegmentInfos implemen
throw new IndexFormatTooOldException(input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC);
}
// 4.0+
- int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_411);
+ int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_50);
version = input.readLong();
counter = input.readInt();
int numSegments = input.readInt();
@@ -361,8 +361,9 @@ public final class SegmentInfos implemen
add(siPerCommit);
}
userData = input.readStringStringMap();
- if (format >= VERSION_411) {
- id = input.readString();
+ if (format >= VERSION_50) {
+ id = new byte[StringHelper.ID_LENGTH];
+ input.readBytes(id, 0, id.length);
}
if (format >= VERSION_48) {
@@ -425,7 +426,7 @@ public final class SegmentInfos implemen
try {
segnOutput = directory.createOutput(segmentFileName, IOContext.DEFAULT);
- CodecUtil.writeHeader(segnOutput, "segments", VERSION_411);
+ CodecUtil.writeHeader(segnOutput, "segments", VERSION_50);
segnOutput.writeLong(version);
segnOutput.writeInt(counter); // write counter
segnOutput.writeInt(size()); // write infos
@@ -451,7 +452,8 @@ public final class SegmentInfos implemen
assert si.dir == directory;
}
segnOutput.writeStringStringMap(userData);
- segnOutput.writeString(StringHelper.randomId());
+ byte[] id = StringHelper.randomId();
+ segnOutput.writeBytes(id, 0, id.length);
CodecUtil.writeFooter(segnOutput);
segnOutput.close();
directory.sync(Collections.singleton(segmentFileName));
Modified: lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/util/StringHelper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/util/StringHelper.java?rev=1627714&r1=1627713&r2=1627714&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/util/StringHelper.java (original)
+++ lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/util/StringHelper.java Fri Sep 26 05:20:43 2014
@@ -232,21 +232,21 @@ public abstract class StringHelper {
// Holds 128 bit unsigned value:
private static BigInteger nextId;
- private static final BigInteger idMask;
+ private static final BigInteger mask128;
private static final Object idLock = new Object();
- private static final String idPad = "00000000000000000000000000000000";
static {
- byte[] maskBytes = new byte[16];
- Arrays.fill(maskBytes, (byte) 0xff);
- idMask = new BigInteger(maskBytes);
+ // 128 bit unsigned mask
+ byte[] maskBytes128 = new byte[16];
+ Arrays.fill(maskBytes128, (byte) 0xff);
+ mask128 = new BigInteger(1, maskBytes128);
+
String prop = System.getProperty("tests.seed");
// State for xorshift128:
long x0;
long x1;
- long seed;
if (prop != null) {
// So if there is a test failure that somehow relied on this id,
// we remain reproducible based on the test seed:
@@ -280,17 +280,25 @@ public abstract class StringHelper {
s1 ^= s1 << 23; // a
x1 = s1 ^ s0 ^ (s1 >>> 17) ^ (s0 >>> 26); // b, c
}
+
+ // 64-bit unsigned mask
+ byte[] maskBytes64 = new byte[8];
+ Arrays.fill(maskBytes64, (byte) 0xff);
+ BigInteger mask64 = new BigInteger(1, maskBytes64);
// First make unsigned versions of x0, x1:
- BigInteger unsignedX0 = new BigInteger(1, BigInteger.valueOf(x0).toByteArray());
- BigInteger unsignedX1 = new BigInteger(1, BigInteger.valueOf(x1).toByteArray());
+ BigInteger unsignedX0 = BigInteger.valueOf(x0).and(mask64);
+ BigInteger unsignedX1 = BigInteger.valueOf(x1).and(mask64);
// Concatentate bits of x0 and x1, as unsigned 128 bit integer:
nextId = unsignedX0.shiftLeft(64).or(unsignedX1);
}
+
+ /** length in bytes of an ID */
+ public static final int ID_LENGTH = 16;
/** Generates a non-cryptographic globally unique id. */
- public static String randomId() {
+ public static byte[] randomId() {
// NOTE: we don't use Java's UUID.randomUUID() implementation here because:
//
@@ -306,15 +314,42 @@ public abstract class StringHelper {
// what impact that has on the period, whereas the simple ++ (mod 2^128)
// we use here is guaranteed to have the full period.
- String id;
+ byte bits[];
synchronized(idLock) {
- id = nextId.toString(16);
- nextId = nextId.add(BigInteger.ONE).and(idMask);
+ bits = nextId.toByteArray();
+ nextId = nextId.add(BigInteger.ONE).and(mask128);
+ }
+
+ // toByteArray() always returns a sign bit, so it may require an extra byte (always zero)
+ if (bits.length > ID_LENGTH) {
+ assert bits.length == ID_LENGTH + 1;
+ assert bits[0] == 0;
+ return Arrays.copyOfRange(bits, 1, bits.length);
+ } else {
+ byte[] result = new byte[ID_LENGTH];
+ System.arraycopy(bits, 0, result, result.length - bits.length, bits.length);
+ return result;
+ }
+ }
+
+ /**
+ * Helper method to render an ID as a string, for debugging
+ * <p>
+ * Returns the string {@code (null)} if the id is null.
+ * Otherwise, returns a string representation for debugging.
+ * Never throws an exception. The returned string may
+ * indicate if the id is definitely invalid.
+ */
+ public static String idToString(byte id[]) {
+ if (id == null) {
+ return "(null)";
+ } else {
+ StringBuilder sb = new StringBuilder();
+ sb.append(new BigInteger(1, id).toString(Character.MAX_RADIX));
+ if (id.length != ID_LENGTH) {
+ sb.append(" (INVALID FORMAT)");
+ }
+ return sb.toString();
}
-
- assert id.length() <= 32: "id=" + id;
- id = idPad.substring(id.length()) + id;
-
- return id;
}
}
Modified: lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1627714&r1=1627713&r2=1627714&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java Fri Sep 26 05:20:43 2014
@@ -2767,11 +2767,13 @@ public class TestIndexWriter extends Luc
SegmentInfos sis = new SegmentInfos();
sis.read(d);
- String id1 = sis.getId();
+ byte[] id1 = sis.getId();
assertNotNull(id1);
+ assertEquals(StringHelper.ID_LENGTH, id1.length);
- String id2 = sis.info(0).info.getId();
+ byte[] id2 = sis.info(0).info.getId();
assertNotNull(id2);
+ assertEquals(StringHelper.ID_LENGTH, id2.length);
// Make sure CheckIndex includes id output:
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
@@ -2784,14 +2786,14 @@ public class TestIndexWriter extends Luc
assertTrue(s, indexStatus != null && indexStatus.clean);
// Commit id is always stored:
- assertTrue("missing id=" + id1 + " in:\n" + s, s.contains("id=" + id1));
+ assertTrue("missing id=" + StringHelper.idToString(id1) + " in:\n" + s, s.contains("id=" + StringHelper.idToString(id1)));
- assertTrue("missing id=" + id2 + " in:\n" + s, s.contains("id=" + id2));
+ assertTrue("missing id=" + StringHelper.idToString(id1) + " in:\n" + s, s.contains("id=" + StringHelper.idToString(id1)));
d.close();
Set<String> ids = new HashSet<>();
for(int i=0;i<100000;i++) {
- String id = StringHelper.randomId();
+ String id = StringHelper.idToString(StringHelper.randomId());
assertFalse("id=" + id + " i=" + i, ids.contains(id));
ids.add(id);
}