You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2011/05/13 13:18:25 UTC
svn commit: r1102677 [3/6] - in /lucene/dev/branches/docvalues: ./
dev-tools/eclipse/ dev-tools/maven/
dev-tools/maven/solr/contrib/dataimporthandler/src/extras/
dev-tools/maven/solr/src/ dev-tools/maven/solr/src/solrj/
dev-tools/scripts/ lucene/ lucen...
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java Fri May 13 11:18:19 2011
@@ -20,7 +20,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.Comparator;
import java.util.List;
import java.util.Set;
@@ -595,7 +594,7 @@ public abstract class LogMergePolicy ext
} else if (!anyTooLarge) {
if (spec == null)
spec = new MergeSpecification();
- final SegmentInfos mergeInfos = new SegmentInfos();
+ final List<SegmentInfo> mergeInfos = new ArrayList<SegmentInfo>();
for(int i=start;i<end;i++) {
mergeInfos.add(levels.get(i).info);
assert infos.contains(levels.get(i).info);
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergeDocIDRemapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergeDocIDRemapper.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergeDocIDRemapper.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergeDocIDRemapper.java Fri May 13 11:18:19 2011
@@ -32,7 +32,7 @@ final class MergeDocIDRemapper {
public MergeDocIDRemapper(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergedDocCount) {
this.docMaps = docMaps;
- SegmentInfo firstSegment = merge.segments.info(0);
+ SegmentInfo firstSegment = merge.segments.get(0);
int i = 0;
while(true) {
SegmentInfo info = infos.info(i);
@@ -45,7 +45,7 @@ final class MergeDocIDRemapper {
int numDocs = 0;
for(int j=0;j<docMaps.length;i++,j++) {
numDocs += infos.info(i).docCount;
- assert infos.info(i).equals(merge.segments.info(j));
+ assert infos.info(i).equals(merge.segments.get(j));
}
maxDocID = minDocID + numDocs;
@@ -55,7 +55,7 @@ final class MergeDocIDRemapper {
starts[0] = minDocID;
newStarts[0] = minDocID;
for(i=1;i<docMaps.length;i++) {
- final int lastDocCount = merge.segments.info(i-1).docCount;
+ final int lastDocCount = merge.segments.get(i-1).docCount;
starts[i] = starts[i-1] + lastDocCount;
newStarts[i] = newStarts[i-1] + lastDocCount - delCounts[i-1];
}
@@ -69,7 +69,7 @@ final class MergeDocIDRemapper {
// assert docShift > 0;
// Make sure it all adds up:
- assert docShift == maxDocID - (newStarts[docMaps.length-1] + merge.segments.info(docMaps.length-1).docCount - delCounts[docMaps.length-1]);
+ assert docShift == maxDocID - (newStarts[docMaps.length-1] + merge.segments.get(docMaps.length-1).docCount - delCounts[docMaps.length-1]);
}
public int remap(int oldDocID) {
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java Fri May 13 11:18:19 2011
@@ -75,15 +75,21 @@ public abstract class MergePolicy implem
long estimatedMergeBytes; // used by IndexWriter
List<SegmentReader> readers; // used by IndexWriter
List<SegmentReader> readerClones; // used by IndexWriter
- public final SegmentInfos segments;
+ public final List<SegmentInfo> segments;
+ public final int totalDocCount;
boolean aborted;
Throwable error;
boolean paused;
- public OneMerge(SegmentInfos segments) {
+ public OneMerge(List<SegmentInfo> segments) {
if (0 == segments.size())
throw new RuntimeException("segments must include at least one segment");
this.segments = segments;
+ int count = 0;
+ for(SegmentInfo info : segments) {
+ count += info.docCount;
+ }
+ totalDocCount = count;
}
/** Record that an exception occurred while executing
@@ -147,7 +153,7 @@ public abstract class MergePolicy implem
final int numSegments = segments.size();
for(int i=0;i<numSegments;i++) {
if (i > 0) b.append(' ');
- b.append(segments.info(i).toString(dir, 0));
+ b.append(segments.get(i).toString(dir, 0));
}
if (info != null)
b.append(" into ").append(info.name);
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfo.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfo.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfo.java Fri May 13 11:18:19 2011
@@ -43,7 +43,8 @@ import org.apache.lucene.util.Constants;
* @lucene.experimental
*/
public final class SegmentInfo {
-
+ // TODO: remove with hasVector and hasProx
+ private static final int CHECK_FIELDINFO = -2;
static final int NO = -1; // e.g. no norms; no deletes;
static final int YES = 1; // e.g. have norms; have deletes;
static final int WITHOUT_GEN = 0; // a file name that has no GEN in it.
@@ -85,10 +86,12 @@ public final class SegmentInfo {
private boolean docStoreIsCompoundFile; // whether doc store files are stored in compound file (*.cfx)
private int delCount; // How many deleted docs in this segment
+
+ //TODO: remove when we don't have to support old indexes anymore that had this field
+ private int hasVectors = CHECK_FIELDINFO;
+ //TODO: remove when we don't have to support old indexes anymore that had this field
+ private int hasProx = CHECK_FIELDINFO; // True if this segment has any fields with omitTermFreqAndPositions==false
- private boolean hasProx; // True if this segment has any fields with omitTermFreqAndPositions==false
-
- private boolean hasVectors; // True if this segment wrote term vectors
private FieldInfos fieldInfos;
@@ -106,9 +109,12 @@ public final class SegmentInfo {
// NOTE: only used in-RAM by IW to track buffered deletes;
// this is never written to/read from the Directory
private long bufferedDeletesGen;
-
+
+ // holds the fieldInfos Version to refresh files() cache if FI has changed
+ private long fieldInfosVersion;
+
public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile,
- boolean hasProx, SegmentCodecs segmentCodecs, boolean hasVectors, FieldInfos fieldInfos) {
+ SegmentCodecs segmentCodecs, FieldInfos fieldInfos) {
this.name = name;
this.docCount = docCount;
this.dir = dir;
@@ -116,9 +122,7 @@ public final class SegmentInfo {
this.isCompoundFile = isCompoundFile;
this.docStoreOffset = -1;
this.docStoreSegment = name;
- this.hasProx = hasProx;
this.segmentCodecs = segmentCodecs;
- this.hasVectors = hasVectors;
delCount = 0;
version = Constants.LUCENE_MAIN_VERSION;
this.fieldInfos = fieldInfos;
@@ -213,7 +217,7 @@ public final class SegmentInfo {
delCount = input.readInt();
assert delCount <= docCount;
- hasProx = input.readByte() == YES;
+ hasProx = input.readByte();
// System.out.println(Thread.currentThread().getName() + ": si.read hasProx=" + hasProx + " seg=" + name);
if (format <= DefaultSegmentInfosWriter.FORMAT_4_0) {
@@ -226,7 +230,7 @@ public final class SegmentInfo {
diagnostics = input.readStringStringMap();
if (format <= DefaultSegmentInfosWriter.FORMAT_HAS_VECTORS) {
- hasVectors = input.readByte() == 1;
+ hasVectors = input.readByte();
} else {
final String storesSegment;
final String ext;
@@ -247,7 +251,7 @@ public final class SegmentInfo {
dirToTest = dir;
}
try {
- hasVectors = dirToTest.fileExists(IndexFileNames.segmentFileName(storesSegment, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
+ hasVectors = dirToTest.fileExists(IndexFileNames.segmentFileName(storesSegment, "", IndexFileNames.VECTORS_INDEX_EXTENSION)) ? YES : NO;
} finally {
if (isCompoundFile) {
dirToTest.close();
@@ -311,14 +315,9 @@ public final class SegmentInfo {
}
public boolean getHasVectors() throws IOException {
- return hasVectors;
- }
-
- public void setHasVectors(boolean v) {
- hasVectors = v;
- clearFilesCache();
+ return hasVectors == CHECK_FIELDINFO ? getFieldInfos().hasVectors() : hasVectors == YES;
}
-
+
public FieldInfos getFieldInfos() throws IOException {
loadFieldInfos(dir, true);
return fieldInfos;
@@ -349,7 +348,7 @@ public final class SegmentInfo {
@Override
public Object clone() {
- final SegmentInfo si = new SegmentInfo(name, docCount, dir, isCompoundFile, hasProx, segmentCodecs, hasVectors,
+ final SegmentInfo si = new SegmentInfo(name, docCount, dir, isCompoundFile, segmentCodecs,
fieldInfos == null ? null : (FieldInfos) fieldInfos.clone());
si.docStoreOffset = docStoreOffset;
si.docStoreSegment = docStoreSegment;
@@ -364,6 +363,8 @@ public final class SegmentInfo {
}
}
si.version = version;
+ si.hasProx = hasProx;
+ si.hasVectors = hasVectors;
return si;
}
@@ -569,19 +570,14 @@ public final class SegmentInfo {
output.writeByte((byte) (isCompoundFile ? YES : NO));
output.writeInt(delCount);
- output.writeByte((byte) (hasProx ? 1:0));
+ output.writeByte((byte) (hasProx));
segmentCodecs.write(output);
output.writeStringStringMap(diagnostics);
- output.writeByte((byte) (hasVectors ? 1 : 0));
- }
-
- void setHasProx(boolean hasProx) {
- this.hasProx = hasProx;
- clearFilesCache();
+ output.writeByte((byte) (hasVectors));
}
- public boolean getHasProx() {
- return hasProx;
+ public boolean getHasProx() throws IOException {
+ return hasProx == CHECK_FIELDINFO ? getFieldInfos().hasProx() : hasProx == YES;
}
/** Can only be called once. */
@@ -609,13 +605,14 @@ public final class SegmentInfo {
*/
public List<String> files() throws IOException {
-
- if (files != null) {
+ final long fisVersion = fieldInfosVersion;
+ if (fisVersion != (fieldInfosVersion = getFieldInfos().getVersion())) {
+ clearFilesCache(); // FIS has modifications - need to recompute
+ } else if (files != null) {
// Already cached:
return files;
}
-
- Set<String> fileSet = new HashSet<String>();
+ final Set<String> fileSet = new HashSet<String>();
boolean useCompoundFile = getUseCompoundFile();
@@ -637,7 +634,7 @@ public final class SegmentInfo {
} else {
fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.FIELDS_INDEX_EXTENSION));
fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.FIELDS_EXTENSION));
- if (hasVectors) {
+ if (getHasVectors()) {
fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
@@ -646,7 +643,7 @@ public final class SegmentInfo {
} else if (!useCompoundFile) {
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.FIELDS_INDEX_EXTENSION));
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.FIELDS_EXTENSION));
- if (hasVectors) {
+ if (getHasVectors()) {
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
@@ -709,8 +706,12 @@ public final class SegmentInfo {
if (this.dir != dir) {
s.append('x');
}
- if (hasVectors) {
- s.append('v');
+ try {
+ if (getHasVectors()) {
+ s.append('v');
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
}
s.append(docCount);
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentMerger.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentMerger.java Fri May 13 11:18:19 2011
@@ -72,7 +72,7 @@ final class SegmentMerger {
private PayloadProcessorProvider payloadProcessorProvider;
- SegmentMerger(Directory dir, int termIndexInterval, String name, MergePolicy.OneMerge merge, CodecProvider codecs, PayloadProcessorProvider payloadProcessorProvider, FieldInfos fieldInfos) {
+ SegmentMerger(Directory dir, int termIndexInterval, String name, MergePolicy.OneMerge merge, PayloadProcessorProvider payloadProcessorProvider, FieldInfos fieldInfos) {
this.payloadProcessorProvider = payloadProcessorProvider;
directory = dir;
segment = name;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java Fri May 13 11:18:19 2011
@@ -32,7 +32,6 @@ public class SegmentWriteState {
public final String segmentName;
public final FieldInfos fieldInfos;
public final int numDocs;
- public boolean hasVectors;
// Deletes to apply while we are flushing the segment. A
// Term is enrolled in here if it was deleted at one
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java Fri May 13 11:18:19 2011
@@ -63,7 +63,6 @@ final class TermVectorsTermsWriter exten
}
lastDocID = 0;
- state.hasVectors = hasVectors;
hasVectors = false;
}
@@ -121,8 +120,7 @@ final class TermVectorsTermsWriter exten
fill(docState.docID);
// Append term vectors to the real outputs:
- long pointer = tvd.getFilePointer();
- tvx.writeLong(pointer);
+ tvx.writeLong(tvd.getFilePointer());
tvx.writeLong(tvf.getFilePointer());
tvd.writeVInt(numVectorFields);
if (numVectorFields > 0) {
@@ -136,6 +134,8 @@ final class TermVectorsTermsWriter exten
tvd.writeVLong(pos-lastPos);
lastPos = pos;
perFields[i].finishDocument();
+ // commit the termVectors once successful success - FI will otherwise reset them
+ perFields[i].fieldInfo.commitVectors();
}
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java Fri May 13 11:18:19 2011
@@ -23,6 +23,8 @@ import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Comparator;
+import java.util.List;
+import java.util.ArrayList;
/**
* Merges segments of approximately equal size, subject to
@@ -249,7 +251,7 @@ public class TieredMergePolicy extends M
final Collection<SegmentInfo> merging = writer.get().getMergingSegments();
final Collection<SegmentInfo> toBeMerged = new HashSet<SegmentInfo>();
- final SegmentInfos infosSorted = new SegmentInfos();
+ final List<SegmentInfo> infosSorted = new ArrayList<SegmentInfo>();
infosSorted.addAll(infos);
Collections.sort(infosSorted, segmentByteSizeDescending);
@@ -277,7 +279,7 @@ public class TieredMergePolicy extends M
// If we have too-large segments, grace them out
// of the maxSegmentCount:
int tooBigCount = 0;
- while (tooBigCount < infosSorted.size() && size(infosSorted.info(tooBigCount)) >= maxMergedSegmentBytes/2.0) {
+ while (tooBigCount < infosSorted.size() && size(infosSorted.get(tooBigCount)) >= maxMergedSegmentBytes/2.0) {
totIndexBytes -= size(infosSorted.get(tooBigCount));
tooBigCount++;
}
@@ -310,7 +312,7 @@ public class TieredMergePolicy extends M
// Gather eligible segments for merging, ie segments
// not already being merged and not already picked (by
// prior iteration of this loop) for merging:
- final SegmentInfos eligible = new SegmentInfos();
+ final List<SegmentInfo> eligible = new ArrayList<SegmentInfo>();
for(int idx = tooBigCount; idx<infosSorted.size(); idx++) {
final SegmentInfo info = infosSorted.get(idx);
if (merging.contains(info)) {
@@ -332,7 +334,7 @@ public class TieredMergePolicy extends M
// OK we are over budget -- find best merge!
MergeScore bestScore = null;
- SegmentInfos best = null;
+ List<SegmentInfo> best = null;
boolean bestTooLarge = false;
long bestMergeBytes = 0;
@@ -341,10 +343,10 @@ public class TieredMergePolicy extends M
long totAfterMergeBytes = 0;
- final SegmentInfos candidate = new SegmentInfos();
+ final List<SegmentInfo> candidate = new ArrayList<SegmentInfo>();
boolean hitTooLarge = false;
for(int idx = startIdx;idx<eligible.size() && candidate.size() < maxMergeAtOnce;idx++) {
- final SegmentInfo info = eligible.info(idx);
+ final SegmentInfo info = eligible.get(idx);
final long segBytes = size(info);
if (totAfterMergeBytes + segBytes > maxMergedSegmentBytes) {
@@ -398,7 +400,7 @@ public class TieredMergePolicy extends M
}
/** Expert: scores one merge; subclasses can override. */
- protected MergeScore score(SegmentInfos candidate, boolean hitTooLarge, long mergingBytes) throws IOException {
+ protected MergeScore score(List<SegmentInfo> candidate, boolean hitTooLarge, long mergingBytes) throws IOException {
long totBeforeMergeBytes = 0;
long totAfterMergeBytes = 0;
long totAfterMergeBytesFloored = 0;
@@ -420,7 +422,7 @@ public class TieredMergePolicy extends M
// over time:
skew = 1.0/maxMergeAtOnce;
} else {
- skew = ((double) floorSize(size(candidate.info(0))))/totAfterMergeBytesFloored;
+ skew = ((double) floorSize(size(candidate.get(0))))/totAfterMergeBytesFloored;
}
// Strongly favor merges with less skew (smaller
@@ -458,7 +460,8 @@ public class TieredMergePolicy extends M
if (verbose()) {
message("findMergesForOptimize maxSegmentCount=" + maxSegmentCount + " infos=" + writer.get().segString(infos) + " segmentsToOptimize=" + segmentsToOptimize);
}
- SegmentInfos eligible = new SegmentInfos();
+
+ List<SegmentInfo> eligible = new ArrayList<SegmentInfo>();
boolean optimizeMergeRunning = false;
final Collection<SegmentInfo> merging = writer.get().getMergingSegments();
for(SegmentInfo info : infos) {
@@ -499,7 +502,7 @@ public class TieredMergePolicy extends M
if (spec == null) {
spec = new MergeSpecification();
}
- final OneMerge merge = new OneMerge(eligible.range(end-maxMergeAtOnceExplicit, end));
+ final OneMerge merge = new OneMerge(eligible.subList(end-maxMergeAtOnceExplicit, end));
if (verbose()) {
message("add merge=" + writer.get().segString(merge.segments));
}
@@ -510,7 +513,7 @@ public class TieredMergePolicy extends M
if (spec == null && !optimizeMergeRunning) {
// Do final merge
final int numToMerge = end - maxSegmentCount + 1;
- final OneMerge merge = new OneMerge(eligible.range(end-numToMerge, end));
+ final OneMerge merge = new OneMerge(eligible.subList(end-numToMerge, end));
if (verbose()) {
message("add final merge=" + merge.segString(writer.get().getDirectory()));
}
@@ -527,7 +530,7 @@ public class TieredMergePolicy extends M
if (verbose()) {
message("findMergesToExpungeDeletes infos=" + writer.get().segString(infos) + " expungeDeletesPctAllowed=" + expungeDeletesPctAllowed);
}
- final SegmentInfos eligible = new SegmentInfos();
+ final List<SegmentInfo> eligible = new ArrayList<SegmentInfo>();
final Collection<SegmentInfo> merging = writer.get().getMergingSegments();
for(SegmentInfo info : infos) {
double pctDeletes = 100.*((double) writer.get().numDeletedDocs(info))/info.docCount;
@@ -580,7 +583,7 @@ public class TieredMergePolicy extends M
spec = new MergeSpecification();
}
- final OneMerge merge = new OneMerge(eligible.range(start, upto));
+ final OneMerge merge = new OneMerge(eligible.subList(start, upto));
if (verbose()) {
message("add merge=" + writer.get().segString(merge.segments));
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java Fri May 13 11:18:19 2011
@@ -72,6 +72,11 @@ public class CodecProvider {
}
}
}
+
+ /** @lucene.internal */
+ public synchronized Set<String> listAll() {
+ return codecs.keySet();
+ }
public Collection<String> getAllExtensions() {
return knownExtensions;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java Fri May 13 11:18:19 2011
@@ -68,15 +68,8 @@ public class PulsingPostingsReaderImpl e
@Override
public Object clone() {
- PulsingTermState clone;
- clone = (PulsingTermState) super.clone();
- if (postingsSize != -1) {
- clone.postings = new byte[postingsSize];
- System.arraycopy(postings, 0, clone.postings, 0, postingsSize);
- } else {
- assert wrappedTermState != null;
- clone.wrappedTermState = (BlockTermState) wrappedTermState.clone();
- }
+ PulsingTermState clone = new PulsingTermState();
+ clone.copyFrom(this);
return clone;
}
@@ -90,8 +83,10 @@ public class PulsingPostingsReaderImpl e
postings = new byte[ArrayUtil.oversize(other.postingsSize, 1)];
}
System.arraycopy(other.postings, 0, postings, 0, other.postingsSize);
- } else {
+ } else if (wrappedTermState != null) {
wrappedTermState.copyFrom(other.wrappedTermState);
+ } else {
+ wrappedTermState = (BlockTermState) other.wrappedTermState.clone();
}
// NOTE: we do not copy the
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java Fri May 13 11:18:19 2011
@@ -85,7 +85,7 @@ public class SepPostingsReaderImpl exten
}
}
- public static void files(SegmentInfo segmentInfo, String codecId, Collection<String> files) {
+ public static void files(SegmentInfo segmentInfo, String codecId, Collection<String> files) throws IOException {
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.DOC_EXTENSION));
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.SKIP_EXTENSION));
@@ -151,14 +151,8 @@ public class SepPostingsReaderImpl exten
@Override
public Object clone() {
- SepTermState other = (SepTermState) super.clone();
- other.docIndex = (IntIndexInput.Index) docIndex.clone();
- if (freqIndex != null) {
- other.freqIndex = (IntIndexInput.Index) freqIndex.clone();
- }
- if (posIndex != null) {
- other.posIndex = (IntIndexInput.Index) posIndex.clone();
- }
+ SepTermState other = new SepTermState();
+ other.copyFrom(this);
return other;
}
@@ -166,12 +160,28 @@ public class SepPostingsReaderImpl exten
public void copyFrom(TermState _other) {
super.copyFrom(_other);
SepTermState other = (SepTermState) _other;
- docIndex.set(other.docIndex);
- if (freqIndex != null && other.freqIndex != null) {
- freqIndex.set(other.freqIndex);
+ if (docIndex == null) {
+ docIndex = (IntIndexInput.Index) other.docIndex.clone();
+ } else {
+ docIndex.set(other.docIndex);
+ }
+ if (other.freqIndex != null) {
+ if (freqIndex == null) {
+ freqIndex = (IntIndexInput.Index) other.freqIndex.clone();
+ } else {
+ freqIndex.set(other.freqIndex);
+ }
+ } else {
+ freqIndex = null;
}
- if (posIndex != null && other.posIndex != null) {
- posIndex.set(other.posIndex);
+ if (other.posIndex != null) {
+ if (posIndex == null) {
+ posIndex = (IntIndexInput.Index) other.posIndex.clone();
+ } else {
+ posIndex.set(other.posIndex);
+ }
+ } else {
+ posIndex = null;
}
payloadFP = other.payloadFP;
skipFP = other.skipFP;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java Fri May 13 11:18:19 2011
@@ -806,6 +806,7 @@ public abstract class QueryParserBase {
}
try {
+ source.end();
source.close();
} catch (IOException ignored) {}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/HitQueue.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/HitQueue.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/HitQueue.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/HitQueue.java Fri May 13 11:18:19 2011
@@ -21,8 +21,6 @@ import org.apache.lucene.util.PriorityQu
final class HitQueue extends PriorityQueue<ScoreDoc> {
- private boolean prePopulate;
-
/**
* Creates a new instance with <code>size</code> elements. If
* <code>prePopulate</code> is set to true, the queue will pre-populate itself
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/IndexSearcher.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/IndexSearcher.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/IndexSearcher.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/IndexSearcher.java Fri May 13 11:18:19 2011
@@ -46,8 +46,18 @@ import org.apache.lucene.util.ThreadInte
*
* <p>Applications usually need only call the inherited
* {@link #search(Query,int)}
- * or {@link #search(Query,Filter,int)} methods. For performance reasons it is
- * recommended to open only one IndexSearcher and use it for all of your searches.
+ * or {@link #search(Query,Filter,int)} methods. For
+ * performance reasons, if your index is unchanging, you
+ * should share a single IndexSearcher instance across
+ * multiple searches instead of creating a new one
+ * per-search. If your index has changed and you wish to
+ * see the changes reflected in searching, you should
+ * use {@link IndexReader#reopen} to obtain a new reader and
+ * then create a new IndexSearcher from that. Also, for
+ * low-latency turnaround it's best to use a near-real-time
+ * reader ({@link IndexReader#open(IndexWriter,boolean)}).
+ * Once you have a new {@link IndexReader}, it's relatively
+ * cheap to create a new IndexSearcher from it.
*
* <a name="thread-safety"></a><p><b>NOTE</b>: <code>{@link
* IndexSearcher}</code> instances are completely
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java Fri May 13 11:18:19 2011
@@ -214,12 +214,12 @@ public class MultiPhraseQuery extends Qu
docFreq = reader.docFreq(term.field(), term.bytes());
}
- postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue());
+ postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue(), terms[0]);
}
// sort by increasing docFreq order
if (slop == 0) {
- ArrayUtil.quickSort(postingsFreqs);
+ ArrayUtil.mergeSort(postingsFreqs);
}
if (slop == 0) {
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhrasePositions.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhrasePositions.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhrasePositions.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhrasePositions.java Fri May 13 11:18:19 2011
@@ -28,13 +28,15 @@ final class PhrasePositions {
int position; // position in doc
int count; // remaining pos in this doc
int offset; // position in phrase
+ final int ord; // unique across all PhrasePositions instances
final DocsAndPositionsEnum postings; // stream of docs & positions
PhrasePositions next; // used to make lists
boolean repeats; // there's other pp for same term (e.g. query="1st word 2nd word"~1)
- PhrasePositions(DocsAndPositionsEnum postings, int o) {
+ PhrasePositions(DocsAndPositionsEnum postings, int o, int ord) {
this.postings = postings;
offset = o;
+ this.ord = ord;
}
final boolean next() throws IOException { // increments to next doc
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhraseQuery.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhraseQuery.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhraseQuery.java Fri May 13 11:18:19 2011
@@ -124,16 +124,48 @@ public class PhraseQuery extends Query {
final DocsAndPositionsEnum postings;
final int docFreq;
final int position;
+ final Term term;
- public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position) {
+ public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position, Term term) {
this.postings = postings;
this.docFreq = docFreq;
this.position = position;
+ this.term = term;
}
public int compareTo(PostingsAndFreq other) {
+ if (docFreq == other.docFreq) {
+ if (position == other.position) {
+ return term.compareTo(other.term);
+ }
+ return position - other.position;
+ }
return docFreq - other.docFreq;
}
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + docFreq;
+ result = prime * result + position;
+ result = prime * result + ((term == null) ? 0 : term.hashCode());
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) return true;
+ if (obj == null) return false;
+ if (getClass() != obj.getClass()) return false;
+ PostingsAndFreq other = (PostingsAndFreq) obj;
+ if (docFreq != other.docFreq) return false;
+ if (position != other.position) return false;
+ if (term == null) {
+ if (other.term != null) return false;
+ } else if (!term.equals(other.term)) return false;
+ return true;
+ }
}
private class PhraseWeight extends Weight {
@@ -197,12 +229,12 @@ public class PhraseQuery extends Query {
return null;
}
}
- postingsFreqs[i] = new PostingsAndFreq(postingsEnum, reader.docFreq(t.field(), t.bytes()), positions.get(i).intValue());
+ postingsFreqs[i] = new PostingsAndFreq(postingsEnum, reader.docFreq(t.field(), t.bytes()), positions.get(i).intValue(), t);
}
// sort by increasing docFreq order
if (slop == 0) {
- ArrayUtil.quickSort(postingsFreqs);
+ ArrayUtil.mergeSort(postingsFreqs);
}
if (slop == 0) { // optimize exact case
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhraseQueue.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhraseQueue.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhraseQueue.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhraseQueue.java Fri May 13 11:18:19 2011
@@ -30,10 +30,16 @@ final class PhraseQueue extends Priority
if (pp1.position == pp2.position)
// same doc and pp.position, so decide by actual term positions.
// rely on: pp.position == tp.position - offset.
- return pp1.offset < pp2.offset;
- else
+ if (pp1.offset == pp2.offset) {
+ return pp1.ord < pp2.ord;
+ } else {
+ return pp1.offset < pp2.offset;
+ }
+ else {
return pp1.position < pp2.position;
- else
+ }
+ else {
return pp1.doc < pp2.doc;
+ }
}
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhraseScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhraseScorer.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhraseScorer.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/PhraseScorer.java Fri May 13 11:18:19 2011
@@ -55,7 +55,7 @@ abstract class PhraseScorer extends Scor
// this allows to easily identify a matching (exact) phrase
// when all PhrasePositions have exactly the same position.
for (int i = 0; i < postings.length; i++) {
- PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position);
+ PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i);
if (last != null) { // add next to end of list
last.next = pp;
} else {
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java Fri May 13 11:18:19 2011
@@ -134,7 +134,7 @@ public abstract class TopTermsRewrite<Q
final Term placeholderTerm = new Term(query.field);
final Q q = getTopLevelQuery();
final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]);
- ArrayUtil.quickSort(scoreTerms, scoreTermSortByTermComp);
+ ArrayUtil.mergeSort(scoreTerms, scoreTermSortByTermComp);
for (final ScoreTerm st : scoreTerms) {
final Term term = placeholderTerm.createTerm(st.bytes);
assert reader.docFreq(term) == st.termState.docFreq() : "reader DF is " + reader.docFreq(term) + " vs " + st.termState.docFreq();
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java Fri May 13 11:18:19 2011
@@ -190,7 +190,7 @@ public class NearSpansOrdered extends Sp
/** Advance the subSpans to the same document */
private boolean toSameDoc() throws IOException {
- ArrayUtil.quickSort(subSpansByDoc, spanDocComparator);
+ ArrayUtil.mergeSort(subSpansByDoc, spanDocComparator);
int firstIndex = 0;
int maxDoc = subSpansByDoc[subSpansByDoc.length - 1].doc();
while (subSpansByDoc[firstIndex].doc() != maxDoc) {
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/SorterTemplate.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/SorterTemplate.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/SorterTemplate.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/SorterTemplate.java Fri May 13 11:18:19 2011
@@ -62,13 +62,26 @@ public abstract class SorterTemplate {
/** Sorts via in-place, but unstable, QuickSort algorithm.
* For small collections falls back to {@link #insertionSort(int,int)}. */
- public final void quickSort(int lo, int hi) {
+ public final void quickSort(final int lo, final int hi) {
+ if (hi <= lo) return;
+ // from Integer's Javadocs: ceil(log2(x)) = 32 - numberOfLeadingZeros(x - 1)
+ quickSort(lo, hi, (Integer.SIZE - Integer.numberOfLeadingZeros(hi - lo)) << 1);
+ }
+
+ private void quickSort(int lo, int hi, int maxDepth) {
+ // fall back to insertion when array has short length
final int diff = hi - lo;
if (diff <= QUICKSORT_THRESHOLD) {
insertionSort(lo, hi);
return;
}
+ // fall back to merge sort when recursion depth gets too big
+ if (--maxDepth == 0) {
+ mergeSort(lo, hi);
+ return;
+ }
+
final int mid = lo + (diff >>> 1);
if (compare(lo, mid) > 0) {
@@ -101,8 +114,8 @@ public abstract class SorterTemplate {
}
}
- quickSort(lo, left);
- quickSort(left + 1, hi);
+ quickSort(lo, left, maxDepth);
+ quickSort(left + 1, hi, maxDepth);
}
/** Sorts via stable in-place MergeSort algorithm
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/Builder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/Builder.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/Builder.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/Builder.java Fri May 13 11:18:19 2011
@@ -261,9 +261,12 @@ public class Builder<T> {
add(scratchIntsRef, output);
}
+ /** It's OK to add the same input twice in a row with
+ * different outputs, as long as outputs impls the merge
+ * method. */
public void add(IntsRef input, T output) throws IOException {
//System.out.println("\nFST ADD: input=" + input + " output=" + fst.outputs.outputToString(output));
- assert lastInput.length == 0 || input.compareTo(lastInput) > 0: "inputs are added out of order lastInput=" + lastInput + " vs input=" + input;
+ assert lastInput.length == 0 || input.compareTo(lastInput) >= 0: "inputs are added out of order lastInput=" + lastInput + " vs input=" + input;
assert validOutput(output);
//System.out.println("\nadd: " + input);
@@ -347,8 +350,15 @@ public class Builder<T> {
assert validOutput(output);
}
- // push remaining output:
- frontier[prefixLenPlus1-1].setLastOutput(input.ints[input.offset + prefixLenPlus1-1], output);
+ if (lastInput.length == input.length && prefixLenPlus1 == 1+input.length) {
+ // same input more than 1 time in a row, mapping to
+ // multiple outputs
+ lastNode.output = fst.outputs.merge(lastNode.output, output);
+ } else {
+ // this new arc is private to this new input; set its
+ // arc output to the leftover output:
+ frontier[prefixLenPlus1-1].setLastOutput(input.ints[input.offset + prefixLenPlus1-1], output);
+ }
// save last input
lastInput.copy(input);
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java Fri May 13 11:18:19 2011
@@ -231,10 +231,13 @@ public class FST<T> {
}
void setEmptyOutput(T v) throws IOException {
- if (emptyOutput != null && !emptyOutput.equals(v)) {
- throw new IllegalStateException("empty output is already set: " + outputs.outputToString(emptyOutput) + " vs " + outputs.outputToString(v));
+ if (emptyOutput != null) {
+ if (!emptyOutput.equals(v)) {
+ emptyOutput = outputs.merge(emptyOutput, v);
+ }
+ } else {
+ emptyOutput = v;
}
- emptyOutput = v;
// TODO: this is messy -- replace with sillyBytesWriter; maybe make
// bytes private
@@ -446,25 +449,17 @@ public class FST<T> {
// reverse bytes in-place; we do this so that the
// "BIT_TARGET_NEXT" opto can work, ie, it reads the
// node just before the current one
- final int endAddress = writer.posWrite;
- final int stopAt = (endAddress - startAddress)/2;
- int upto = 0;
- while (upto < stopAt) {
- final byte b = bytes[startAddress+upto];
- bytes[startAddress+upto] = bytes[endAddress-upto-1];
- bytes[endAddress-upto-1] = b;
- upto++;
- }
+ final int endAddress = lastFrozenNode = writer.posWrite - 1;
- lastFrozenNode = endAddress - 1;
- /*
- System.out.println(" return node addr=" + (endAddress-1));
- for(int i=endAddress-1;i>=startAddress;i--) {
- System.out.println(" bytes[" + i + "]=" + bytes[i]);
+ int left = startAddress;
+ int right = endAddress;
+ while (left < right) {
+ final byte b = bytes[left];
+ bytes[left++] = bytes[right];
+ bytes[right--] = b;
}
- */
- return endAddress-1;
+ return endAddress;
}
/** Fills virtual 'start' arc, ie, an empty incoming arc to
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/FSTEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/FSTEnum.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/FSTEnum.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/FSTEnum.java Fri May 13 11:18:19 2011
@@ -140,7 +140,7 @@ abstract class FSTEnum<T> {
// Arcs are fixed array -- use binary search to find
// the target.
- final FST.BytesReader in = fst.getBytesReader(0);
+ final FST<T>.BytesReader in = fst.getBytesReader(0);
int low = arc.arcIdx;
int high = arc.numArcs-1;
int mid = 0;
@@ -278,7 +278,7 @@ abstract class FSTEnum<T> {
// Arcs are fixed array -- use binary search to find
// the target.
- final FST.BytesReader in = fst.getBytesReader(0);
+ final FST<T>.BytesReader in = fst.getBytesReader(0);
int low = arc.arcIdx;
int high = arc.numArcs-1;
int mid = 0;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/NodeHash.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/NodeHash.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/NodeHash.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/NodeHash.java Fri May 13 11:18:19 2011
@@ -40,7 +40,7 @@ final class NodeHash<T> {
return false;
}
for(int arcUpto=0;arcUpto<node.numArcs;arcUpto++) {
- final Builder.Arc arc = node.arcs[arcUpto];
+ final Builder.Arc<T> arc = node.arcs[arcUpto];
if (arc.label != scratchArc.label ||
!arc.output.equals(scratchArc.output) ||
((Builder.CompiledNode) arc.target).address != scratchArc.target ||
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/Outputs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/Outputs.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/Outputs.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/Outputs.java Fri May 13 11:18:19 2011
@@ -54,4 +54,8 @@ public abstract class Outputs<T> {
public abstract T getNoOutput();
public abstract String outputToString(T output);
+
+ public T merge(T first, T second) {
+ throw new UnsupportedOperationException();
+ }
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/PairOutputs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/PairOutputs.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/PairOutputs.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/PairOutputs.java Fri May 13 11:18:19 2011
@@ -43,7 +43,7 @@ public class PairOutputs<A,B> extends Ou
this.output2 = output2;
}
- @Override @SuppressWarnings("unchecked")
+ @Override @SuppressWarnings("rawtypes")
public boolean equals(Object other) {
if (other == this) {
return true;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/PositiveIntOutputs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/PositiveIntOutputs.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/PositiveIntOutputs.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/util/automaton/fst/PositiveIntOutputs.java Fri May 13 11:18:19 2011
@@ -22,14 +22,11 @@ import java.io.IOException;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
-// TODO: make a sharing and non-sharing variant; eg if you
-// output docFreq per term the FST will be smaller if you
-// don't share since they are not "well shared"
-
/**
* Output is a long, for each input term. NOTE: the
* resulting FST is not guaranteed to be minimal! See
- * {@link Builder}.
+ * {@link Builder}. You cannot store 0 output with this
+ * (that's reserved to mean "no output")!
* @lucene.experimental
*/
Modified: lucene/dev/branches/docvalues/lucene/src/site/src/documentation/content/xdocs/fileformats.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/site/src/documentation/content/xdocs/fileformats.xml?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/site/src/documentation/content/xdocs/fileformats.xml (original)
+++ lucene/dev/branches/docvalues/lucene/src/site/src/documentation/content/xdocs/fileformats.xml Fri May 13 11:18:19 2011
@@ -90,6 +90,14 @@
<p>
In version 3.1, segments records the code version
that created them. See LUCENE-2720 for details.
+
+ Additionally segments track explicitly whether or
+ not they have term vectors. See LUCENE-2811 for details.
+ </p>
+ <p>
+ In version 3.2, numeric fields are written as natively
+ to stored fields file, previously they were stored in
+ text format only.
</p>
</section>
@@ -935,7 +943,7 @@
<b>3.1</b>
Segments --> Format, Version, NameCounter, SegCount, <SegVersion, SegName, SegSize, DelGen, DocStoreOffset, [DocStoreSegment, DocStoreIsCompoundFile], HasSingleNormFile, NumField,
NormGen<sup>NumField</sup>,
- IsCompoundFile, DeletionCount, HasProx, Diagnostics><sup>SegCount</sup>, CommitUserData, Checksum
+ IsCompoundFile, DeletionCount, HasProx, Diagnostics, HasVectors><sup>SegCount</sup>, CommitUserData, Checksum
</p>
<p>
@@ -957,7 +965,7 @@
<p>
IsCompoundFile, HasSingleNormFile,
- DocStoreIsCompoundFile, HasProx --> Int8
+ DocStoreIsCompoundFile, HasProx, HasVectors --> Int8
</p>
<p>
@@ -1083,6 +1091,10 @@
Lucene version, OS, Java version, why the segment
was created (merge, flush, addIndexes), etc.
</p>
+
+ <p> HasVectors is 1 if this segment stores term vectors,
+ else it's 0.
+ </p>
</section>
@@ -1293,10 +1305,18 @@
<li>third bit is one for fields with compression option enabled
(if compression is enabled, the algorithm used is ZLIB),
only available for indexes until Lucene version 2.9.x</li>
+ <li>4th to 6th bits (mask: 0x7<<3) define the type of a
+ numeric field: <ul>
+ <li>all bits in mask are cleared if no numeric field at all</li>
+ <li>1<<3: Value is Int</li>
+ <li>2<<3: Value is Long</li>
+ <li>3<<3: Value is Int as Float (as of Integer.intBitsToFloat)</li>
+ <li>4<<3: Value is Long as Double (as of Double.longBitsToDouble)</li>
+ </ul></li>
</ul>
</p>
<p>Value -->
- String | BinaryValue (depending on Bits)
+ String | BinaryValue | Int | Long (depending on Bits)
</p>
<p>BinaryValue -->
ValueSize, <Byte>^ValueSize
Modified: lucene/dev/branches/docvalues/lucene/src/site/src/documentation/content/xdocs/gettingstarted.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/site/src/documentation/content/xdocs/gettingstarted.xml?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/site/src/documentation/content/xdocs/gettingstarted.xml (original)
+++ lucene/dev/branches/docvalues/lucene/src/site/src/documentation/content/xdocs/gettingstarted.xml Fri May 13 11:18:19 2011
@@ -28,11 +28,11 @@ may wish to skip sections.
<ul>
<li><a href="demo.html">About the command-line Lucene demo and its usage</a>. This section
- is intended for anyone who wants to use the command-line Lucene demo.</li> <p/>
+ is intended for anyone who wants to use the command-line Lucene demo.</li>
<li><a href="demo2.html">About the sources and implementation for the command-line Lucene
demo</a>. This section walks through the implementation details (sources) of the
- command-line Lucene demo. This section is intended for developers.</li> <p/>
+ command-line Lucene demo. This section is intended for developers.</li>
</ul>
</section>
Modified: lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/BaseTokenStreamTestCase.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/BaseTokenStreamTestCase.java Fri May 13 11:18:19 2011
@@ -262,6 +262,7 @@ public abstract class BaseTokenStreamTes
tokens.add(termAtt.toString());
// TODO: we could collect offsets etc here for better checking that reset() really works.
}
+ ts.end();
ts.close();
// verify reusing is "reproducable" and also get the normal tokenstream sanity checks
if (!tokens.isEmpty())
Modified: lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java Fri May 13 11:18:19 2011
@@ -36,6 +36,7 @@ public final class MockAnalyzer extends
private int positionIncrementGap;
private final Random random;
private Map<String,Integer> previousMappings = new HashMap<String,Integer>();
+ private boolean enableChecks = true;
/**
* Creates a new MockAnalyzer.
@@ -75,6 +76,7 @@ public final class MockAnalyzer extends
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
MockTokenizer tokenizer = new MockTokenizer(reader, runAutomaton, lowerCase);
+ tokenizer.setEnableChecks(enableChecks);
TokenFilter filt = new MockTokenFilter(tokenizer, filter, enablePositionIncrements);
filt = maybePayload(filt, fieldName);
return filt;
@@ -98,13 +100,13 @@ public final class MockAnalyzer extends
if (saved == null) {
saved = new SavedStreams();
saved.tokenizer = new MockTokenizer(reader, runAutomaton, lowerCase);
+ saved.tokenizer.setEnableChecks(enableChecks);
saved.filter = new MockTokenFilter(saved.tokenizer, filter, enablePositionIncrements);
saved.filter = maybePayload(saved.filter, fieldName);
map.put(fieldName, saved);
return saved.filter;
} else {
saved.tokenizer.reset(reader);
- saved.filter.reset();
return saved.filter;
}
}
@@ -139,4 +141,12 @@ public final class MockAnalyzer extends
public int getPositionIncrementGap(String fieldName){
return positionIncrementGap;
}
+
+ /**
+ * Toggle consumer workflow checking: if your test consumes tokenstreams normally you
+ * should leave this enabled.
+ */
+ public void setEnableChecks(boolean enableChecks) {
+ this.enableChecks = enableChecks;
+ }
}
Modified: lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockPayloadAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockPayloadAnalyzer.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockPayloadAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockPayloadAnalyzer.java Fri May 13 11:18:19 2011
@@ -86,6 +86,7 @@ final class MockPayloadFilter extends To
@Override
public void reset() throws IOException {
+ super.reset();
i = 0;
pos = 0;
}
Modified: lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java Fri May 13 11:18:19 2011
@@ -20,14 +20,15 @@ package org.apache.lucene.analysis;
import java.io.IOException;
import java.io.Reader;
-import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.RegExp;
/**
* Automaton-based tokenizer for testing. Optionally lowercases.
*/
-public class MockTokenizer extends CharTokenizer {
+public class MockTokenizer extends Tokenizer {
/** Acts Similar to WhitespaceTokenizer */
public static final CharacterRunAutomaton WHITESPACE =
new CharacterRunAutomaton(new RegExp("[^ \t\r\n]+").toAutomaton());
@@ -45,21 +46,88 @@ public class MockTokenizer extends CharT
private final boolean lowerCase;
private int state;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ int off = 0;
+
+ // TODO: "register" with LuceneTestCase to ensure all streams are closed() ?
+ // currently, we can only check that the lifecycle is correct if someone is reusing,
+ // but not for "one-offs".
+ private static enum State {
+ SETREADER, // consumer set a reader input either via ctor or via reset(Reader)
+ RESET, // consumer has called reset()
+ INCREMENT, // consumer is consuming, has called incrementToken() == true
+ INCREMENT_FALSE, // consumer has called incrementToken() which returned false
+ END, // consumer has called end() to perform end of stream operations
+ CLOSE // consumer has called close() to release any resources
+ };
+
+ private State streamState = State.CLOSE;
+ private boolean enableChecks = true;
+
public MockTokenizer(AttributeFactory factory, Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
- super(LuceneTestCase.TEST_VERSION_CURRENT, factory, input);
+ super(factory, input);
this.runAutomaton = runAutomaton;
this.lowerCase = lowerCase;
this.state = runAutomaton.getInitialState();
+ this.streamState = State.SETREADER;
}
public MockTokenizer(Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
- super(LuceneTestCase.TEST_VERSION_CURRENT, input);
+ super(input);
this.runAutomaton = runAutomaton;
this.lowerCase = lowerCase;
this.state = runAutomaton.getInitialState();
+ this.streamState = State.SETREADER;
}
@Override
+ public final boolean incrementToken() throws IOException {
+ assert !enableChecks || (streamState == State.RESET || streamState == State.INCREMENT)
+ : "incrementToken() called while in wrong state: " + streamState;
+ clearAttributes();
+ for (;;) {
+ int startOffset = off;
+ int cp = readCodePoint();
+ if (cp < 0) {
+ break;
+ } else if (isTokenChar(cp)) {
+ int endOffset;
+ do {
+ char chars[] = Character.toChars(normalize(cp));
+ for (int i = 0; i < chars.length; i++)
+ termAtt.append(chars[i]);
+ endOffset = off;
+ cp = readCodePoint();
+ } while (cp >= 0 && isTokenChar(cp));
+ offsetAtt.setOffset(startOffset, endOffset);
+ streamState = State.INCREMENT;
+ return true;
+ }
+ }
+ streamState = State.INCREMENT_FALSE;
+ return false;
+ }
+
+ protected int readCodePoint() throws IOException {
+ int ch = input.read();
+ if (ch < 0) {
+ return ch;
+ } else {
+ assert !Character.isLowSurrogate((char) ch);
+ off++;
+ if (Character.isHighSurrogate((char) ch)) {
+ int ch2 = input.read();
+ if (ch2 >= 0) {
+ off++;
+ assert Character.isLowSurrogate((char) ch2);
+ return Character.toCodePoint((char) ch, (char) ch2);
+ }
+ }
+ return ch;
+ }
+ }
+
protected boolean isTokenChar(int c) {
state = runAutomaton.step(state, c);
if (state < 0) {
@@ -70,7 +138,6 @@ public class MockTokenizer extends CharT
}
}
- @Override
protected int normalize(int c) {
return lowerCase ? Character.toLowerCase(c) : c;
}
@@ -79,5 +146,43 @@ public class MockTokenizer extends CharT
public void reset() throws IOException {
super.reset();
state = runAutomaton.getInitialState();
+ off = 0;
+ assert !enableChecks || streamState != State.RESET : "double reset()";
+ streamState = State.RESET;
+ }
+
+ @Override
+ public void close() throws IOException {
+ super.close();
+ // in some exceptional cases (e.g. TestIndexWriterExceptions) a test can prematurely close()
+ // these tests should disable this check, by default we check the normal workflow.
+ // TODO: investigate the CachingTokenFilter "double-close"... for now we ignore this
+ assert !enableChecks || streamState == State.END || streamState == State.CLOSE : "close() called in wrong state: " + streamState;
+ streamState = State.CLOSE;
+ }
+
+ @Override
+ public void reset(Reader input) throws IOException {
+ super.reset(input);
+ assert !enableChecks || streamState == State.CLOSE : "setReader() called in wrong state: " + streamState;
+ streamState = State.SETREADER;
+ }
+
+ @Override
+ public void end() throws IOException {
+ int finalOffset = correctOffset(off);
+ offsetAtt.setOffset(finalOffset, finalOffset);
+ // some tokenizers, such as limiting tokenizers, call end() before incrementToken() returns false.
+ // these tests should disable this check (in general you should consume the entire stream)
+ assert !enableChecks || streamState == State.INCREMENT_FALSE : "end() called before incrementToken() returned false!";
+ streamState = State.END;
+ }
+
+ /**
+ * Toggle consumer workflow checking: if your test consumes tokenstreams normally you
+ * should leave this enabled.
+ */
+ public void setEnableChecks(boolean enableChecks) {
+ this.enableChecks = enableChecks;
}
}
Modified: lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/MockRandomMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/MockRandomMergePolicy.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/MockRandomMergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/MockRandomMergePolicy.java Fri May 13 11:18:19 2011
@@ -18,7 +18,9 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Collections;
+import java.util.List;
import java.util.Random;
import java.util.Set;
@@ -58,21 +60,36 @@ public class MockRandomMergePolicy exten
SegmentInfos segmentInfos, int maxSegmentCount, Set<SegmentInfo> segmentsToOptimize)
throws CorruptIndexException, IOException {
- //System.out.println("MRMP: findMergesForOptimize sis=" + segmentInfos);
+ final List<SegmentInfo> eligibleSegments = new ArrayList<SegmentInfo>();
+ for(SegmentInfo info : segmentInfos) {
+ if (segmentsToOptimize.contains(info)) {
+ eligibleSegments.add(info);
+ }
+ }
+
+ //System.out.println("MRMP: findMergesForOptimize sis=" + segmentInfos + " eligible=" + eligibleSegments);
MergeSpecification mergeSpec = null;
- if (segmentInfos.size() > 1 || (segmentInfos.size() == 1 && segmentInfos.info(0).hasDeletions())) {
+ if (eligibleSegments.size() > 1 || (eligibleSegments.size() == 1 && eligibleSegments.get(0).hasDeletions())) {
mergeSpec = new MergeSpecification();
- SegmentInfos segmentInfos2 = new SegmentInfos();
- segmentInfos2.addAll(segmentInfos);
- Collections.shuffle(segmentInfos2, random);
+ // Already shuffled having come out of a set but
+ // shuffle again for good measure:
+ Collections.shuffle(eligibleSegments, random);
int upto = 0;
- while(upto < segmentInfos.size()) {
- int max = Math.min(10, segmentInfos.size()-upto);
+ while(upto < eligibleSegments.size()) {
+ int max = Math.min(10, eligibleSegments.size()-upto);
int inc = max <= 2 ? max : _TestUtil.nextInt(random, 2, max);
- mergeSpec.add(new OneMerge(segmentInfos2.range(upto, upto+inc)));
+ mergeSpec.add(new OneMerge(eligibleSegments.subList(upto, upto+inc)));
upto += inc;
}
}
+
+ if (mergeSpec != null) {
+ for(OneMerge merge : mergeSpec.merges) {
+ for(SegmentInfo info : merge.segments) {
+ assert segmentsToOptimize.contains(info);
+ }
+ }
+ }
return mergeSpec;
}
Modified: lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java Fri May 13 11:18:19 2011
@@ -146,6 +146,9 @@ public class MockRandomCodec extends Cod
out.close();
final Random random = new Random(seed);
+
+ random.nextInt(); // consume a random for buffersize
+
PostingsWriterBase postingsWriter;
if (random.nextBoolean()) {
@@ -244,16 +247,22 @@ public class MockRandomCodec extends Cod
in.close();
final Random random = new Random(seed);
+
+ int readBufferSize = _TestUtil.nextInt(random, 1, 4096);
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println("MockRandomCodec: readBufferSize=" + readBufferSize);
+ }
+
PostingsReaderBase postingsReader;
if (random.nextBoolean()) {
postingsReader = new SepPostingsReaderImpl(state.dir, state.segmentInfo,
- state.readBufferSize, new MockIntStreamFactory(random), state.codecId);
+ readBufferSize, new MockIntStreamFactory(random), state.codecId);
} else {
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: reading Standard postings");
}
- postingsReader = new StandardPostingsReader(state.dir, state.segmentInfo, state.readBufferSize, state.codecId);
+ postingsReader = new StandardPostingsReader(state.dir, state.segmentInfo, readBufferSize, state.codecId);
}
if (random.nextBoolean()) {
@@ -318,7 +327,7 @@ public class MockRandomCodec extends Cod
state.fieldInfos,
state.segmentInfo.name,
postingsReader,
- state.readBufferSize,
+ readBufferSize,
termsCacheSize,
state.codecId);
success = true;
Modified: lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java Fri May 13 11:18:19 2011
@@ -25,7 +25,6 @@ import org.apache.lucene.index.SegmentIn
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.codecs.Codec;
-import org.apache.lucene.index.codecs.DocValuesConsumer;
import org.apache.lucene.index.codecs.DefaultDocValuesProducer;
import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.FieldsProducer;
Modified: lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java Fri May 13 11:18:19 2011
@@ -71,6 +71,7 @@ public class MockDirectoryWrapper extend
Set<String> openFilesForWrite = new HashSet<String>();
volatile boolean crashed;
private ThrottledIndexOutput throttledOutput;
+ private Throttling throttling = Throttling.SOMETIMES;
// use this for tracking files for crash.
// additionally: provides debugging information in case you leave one open
@@ -104,6 +105,8 @@ public class MockDirectoryWrapper extend
// called from different threads; else test failures may
// not be reproducible from the original seed
this.randomState = new Random(random.nextInt());
+ this.throttledOutput = new ThrottledIndexOutput(ThrottledIndexOutput
+ .mBitsToBytes(40 + randomState.nextInt(10)), 5 + randomState.nextInt(5), null);
init();
}
@@ -117,8 +120,17 @@ public class MockDirectoryWrapper extend
preventDoubleWrite = value;
}
- public void setThrottledIndexOutput(ThrottledIndexOutput throttledOutput) {
- this.throttledOutput = throttledOutput;
+ public static enum Throttling {
+ /** always emulate a slow hard disk. could be very slow! */
+ ALWAYS,
+ /** sometimes (2% of the time) emulate a slow hard disk. */
+ SOMETIMES,
+ /** never throttle output */
+ NEVER
+ };
+
+ public void setThrottling(Throttling throttling) {
+ this.throttling = throttling;
}
@Override
@@ -354,7 +366,17 @@ public class MockDirectoryWrapper extend
IndexOutput io = new MockIndexOutputWrapper(this, delegate.createOutput(name), name);
openFileHandles.put(io, new RuntimeException("unclosed IndexOutput"));
openFilesForWrite.add(name);
- return throttledOutput == null ? io : throttledOutput.newFromDelegate(io);
+
+ // throttling REALLY slows down tests, so don't do it very often for SOMETIMES.
+ if (throttling == Throttling.ALWAYS ||
+ (throttling == Throttling.SOMETIMES && randomState.nextInt(50) == 0)) {
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println("MockDirectoryWrapper: throttling indexOutput");
+ }
+ return throttledOutput.newFromDelegate(io);
+ } else {
+ return io;
+ }
}
@Override
Modified: lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java Fri May 13 11:18:19 2011
@@ -137,6 +137,8 @@ public abstract class LuceneTestCase ext
// tests)
/** Gets the codec to run tests with. */
public static final String TEST_CODEC = System.getProperty("tests.codec", "randomPerField");
+ /** Gets the codecprovider to run tests with */
+ public static final String TEST_CODECPROVIDER = System.getProperty("tests.codecprovider", "random");
/** Gets the locale to run tests with */
public static final String TEST_LOCALE = System.getProperty("tests.locale", "random");
/** Gets the timezone to run tests with */
@@ -329,15 +331,38 @@ public abstract class LuceneTestCase ext
tempDirs.clear();
stores = Collections.synchronizedMap(new IdentityHashMap<MockDirectoryWrapper,StackTraceElement[]>());
savedCodecProvider = CodecProvider.getDefault();
- if ("randomPerField".equals(TEST_CODEC)) {
- if (random.nextInt(4) == 0) { // preflex-only setup
- codec = installTestCodecs("PreFlex", CodecProvider.getDefault());
- } else { // per-field setup
- CodecProvider.setDefault(new RandomCodecProvider(random));
+ if ("random".equals(TEST_CODECPROVIDER)) {
+ if ("randomPerField".equals(TEST_CODEC)) {
+ if (random.nextInt(4) == 0) { // preflex-only setup
+ codec = installTestCodecs("PreFlex", CodecProvider.getDefault());
+ } else { // per-field setup
+ CodecProvider.setDefault(new RandomCodecProvider(random));
+ codec = installTestCodecs(TEST_CODEC, CodecProvider.getDefault());
+ }
+ } else { // ordinary setup
codec = installTestCodecs(TEST_CODEC, CodecProvider.getDefault());
}
- } else { // ordinary setup
- codec = installTestCodecs(TEST_CODEC, CodecProvider.getDefault());
+ } else {
+ // someone specified their own codecprovider by class
+ try {
+ Class<? extends CodecProvider> cpClazz = Class.forName(TEST_CODECPROVIDER).asSubclass(CodecProvider.class);
+ CodecProvider cp = cpClazz.newInstance();
+ String codecName;
+ if (TEST_CODEC.startsWith("random")) { // TODO: somehow do random per-field?!
+ Set<String> codecSet = cp.listAll();
+ String availableCodecs[] = codecSet.toArray(new String[codecSet.size()]);
+ codecName = availableCodecs[random.nextInt(availableCodecs.length)];
+ } else {
+ codecName = TEST_CODEC;
+ }
+
+ codec = cp.lookup(codecName);
+ cp.setDefaultFieldCodec(codecName);
+ CodecProvider.setDefault(cp);
+ } catch (Exception e) {
+ System.err.println("Could not instantiate CodecProvider: " + TEST_CODECPROVIDER);
+ throw new RuntimeException(e);
+ }
}
savedLocale = Locale.getDefault();
locale = TEST_LOCALE.equals("random") ? randomLocale(random) : localeForName(TEST_LOCALE);
@@ -360,16 +385,13 @@ public abstract class LuceneTestCase ext
String codecDescription;
CodecProvider cp = CodecProvider.getDefault();
- if ("randomPerField".equals(TEST_CODEC)) {
- if (cp instanceof RandomCodecProvider)
- codecDescription = cp.toString();
- else
- codecDescription = "PreFlex";
+ if ("randomPerField".equals(TEST_CODEC) && cp instanceof RandomCodecProvider) {
+ codecDescription = cp.toString();
} else {
codecDescription = codec.toString();
}
- if (CodecProvider.getDefault() == savedCodecProvider)
+ if ("random".equals(TEST_CODECPROVIDER) && CodecProvider.getDefault() == savedCodecProvider)
removeTestCodecs(codec, CodecProvider.getDefault());
CodecProvider.setDefault(savedCodecProvider);
Locale.setDefault(savedLocale);
Modified: lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java Fri May 13 11:18:19 2011
@@ -107,6 +107,7 @@ public class TestMockAnalyzer extends Ba
// consume
}
stream.end();
+ stream.close();
assertAnalyzesToReuse(analyzer, testString, new String[] { "t" });
}
Modified: lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/Test2BTerms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/Test2BTerms.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/Test2BTerms.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/Test2BTerms.java Fri May 13 11:18:19 2011
@@ -153,7 +153,8 @@ public class Test2BTerms extends LuceneT
List<BytesRef> savedTerms = null;
- Directory dir = newFSDirectory(_TestUtil.getTempDir("2BTerms"));
+ MockDirectoryWrapper dir = newFSDirectory(_TestUtil.getTempDir("2BTerms"));
+ dir.setThrottling(MockDirectoryWrapper.Throttling.NEVER);
//Directory dir = newFSDirectory(new File("/p/lucene/indices/2bindex"));
if (true) {