You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/05/14 15:51:59 UTC
svn commit: r1103112 [8/24] - in /lucene/dev/branches/flexscoring: ./
dev-tools/eclipse/ dev-tools/idea/.idea/ dev-tools/idea/lucene/contrib/ant/
dev-tools/idea/lucene/contrib/db/bdb-je/
dev-tools/idea/lucene/contrib/db/bdb/ dev-tools/idea/lucene/contr...
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java Sat May 14 13:51:35 2011
@@ -20,8 +20,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.Collections;
-import java.util.Comparator;
import java.util.List;
import java.util.Set;
@@ -72,7 +70,6 @@ public abstract class LogMergePolicy ext
// out there wrote his own LMP ...
protected long maxMergeSizeForOptimize = Long.MAX_VALUE;
protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
- protected boolean requireContiguousMerge = false;
protected double noCFSRatio = DEFAULT_NO_CFS_RATIO;
@@ -111,21 +108,6 @@ public abstract class LogMergePolicy ext
writer.get().message("LMP: " + message);
}
- /** If true, merges must be in-order slice of the
- * segments. If false, then the merge policy is free to
- * pick any segments. The default is false, which is
- * in general more efficient than true since it gives the
- * merge policy more freedom to pick closely sized
- * segments. */
- public void setRequireContiguousMerge(boolean v) {
- requireContiguousMerge = v;
- }
-
- /** See {@link #setRequireContiguousMerge}. */
- public boolean getRequireContiguousMerge() {
- return requireContiguousMerge;
- }
-
/** <p>Returns the number of segments that are merged at
* once and also controls the total number of segments
* allowed to accumulate in the index.</p> */
@@ -378,8 +360,6 @@ public abstract class LogMergePolicy ext
return null;
}
- // TODO: handle non-contiguous merge case differently?
-
// Find the newest (rightmost) segment that needs to
// be optimized (other segments may have been flushed
// since optimize started):
@@ -499,14 +479,6 @@ public abstract class LogMergePolicy ext
}
}
- private static class SortByIndex implements Comparator<SegmentInfoAndLevel> {
- public int compare(SegmentInfoAndLevel o1, SegmentInfoAndLevel o2) {
- return o1.index - o2.index;
- }
- }
-
- private static final SortByIndex sortByIndex = new SortByIndex();
-
/** Checks if any merges are now necessary and returns a
* {@link MergePolicy.MergeSpecification} if so. A merge
* is necessary when there are more than {@link
@@ -532,31 +504,24 @@ public abstract class LogMergePolicy ext
final SegmentInfo info = infos.info(i);
long size = size(info);
- // When we require contiguous merge, we still add the
- // segment to levels to avoid merging "across" a set
- // of segment being merged:
- if (!requireContiguousMerge && mergingSegments.contains(info)) {
- if (verbose()) {
- message("seg " + info.name + " already being merged; skip");
- }
- continue;
- }
-
// Floor tiny segments
if (size < 1) {
size = 1;
}
+
final SegmentInfoAndLevel infoLevel = new SegmentInfoAndLevel(info, (float) Math.log(size)/norm, i);
levels.add(infoLevel);
+
if (verbose()) {
- message("seg " + info.name + " level=" + infoLevel.level + " size=" + size);
+ final long segBytes = sizeBytes(info);
+ String extra = mergingSegments.contains(info) ? " [merging]" : "";
+ if (size >= maxMergeSize) {
+ extra += " [skip: too large]";
+ }
+ message("seg=" + writer.get().segString(info) + " level=" + infoLevel.level + " size=" + String.format("%.3f MB", segBytes/1024/1024.) + extra);
}
}
- if (!requireContiguousMerge) {
- Collections.sort(levels);
- }
-
final float levelFloor;
if (minMergeSize <= 0)
levelFloor = (float) 0.0;
@@ -614,23 +579,29 @@ public abstract class LogMergePolicy ext
int end = start + mergeFactor;
while(end <= 1+upto) {
boolean anyTooLarge = false;
+ boolean anyMerging = false;
for(int i=start;i<end;i++) {
final SegmentInfo info = levels.get(i).info;
anyTooLarge |= (size(info) >= maxMergeSize || sizeDocs(info) >= maxMergeDocs);
+ if (mergingSegments.contains(info)) {
+ anyMerging = true;
+ break;
+ }
}
- if (!anyTooLarge) {
+ if (anyMerging) {
+ // skip
+ } else if (!anyTooLarge) {
if (spec == null)
spec = new MergeSpecification();
- if (verbose()) {
- message(" " + start + " to " + end + ": add this merge");
- }
- Collections.sort(levels.subList(start, end), sortByIndex);
- final SegmentInfos mergeInfos = new SegmentInfos();
+ final List<SegmentInfo> mergeInfos = new ArrayList<SegmentInfo>();
for(int i=start;i<end;i++) {
mergeInfos.add(levels.get(i).info);
assert infos.contains(levels.get(i).info);
}
+ if (verbose()) {
+ message(" add merge=" + writer.get().segString(mergeInfos) + " start=" + start + " end=" + end);
+ }
spec.add(new OneMerge(mergeInfos));
} else if (verbose()) {
message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping");
@@ -682,7 +653,7 @@ public abstract class LogMergePolicy ext
sb.append("calibrateSizeByDeletes=").append(calibrateSizeByDeletes).append(", ");
sb.append("maxMergeDocs=").append(maxMergeDocs).append(", ");
sb.append("useCompoundFile=").append(useCompoundFile).append(", ");
- sb.append("requireContiguousMerge=").append(requireContiguousMerge);
+ sb.append("noCFSRatio=").append(noCFSRatio);
sb.append("]");
return sb.toString();
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/MergeDocIDRemapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/MergeDocIDRemapper.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/MergeDocIDRemapper.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/MergeDocIDRemapper.java Sat May 14 13:51:35 2011
@@ -32,7 +32,7 @@ final class MergeDocIDRemapper {
public MergeDocIDRemapper(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergedDocCount) {
this.docMaps = docMaps;
- SegmentInfo firstSegment = merge.segments.info(0);
+ SegmentInfo firstSegment = merge.segments.get(0);
int i = 0;
while(true) {
SegmentInfo info = infos.info(i);
@@ -45,7 +45,7 @@ final class MergeDocIDRemapper {
int numDocs = 0;
for(int j=0;j<docMaps.length;i++,j++) {
numDocs += infos.info(i).docCount;
- assert infos.info(i).equals(merge.segments.info(j));
+ assert infos.info(i).equals(merge.segments.get(j));
}
maxDocID = minDocID + numDocs;
@@ -55,7 +55,7 @@ final class MergeDocIDRemapper {
starts[0] = minDocID;
newStarts[0] = minDocID;
for(i=1;i<docMaps.length;i++) {
- final int lastDocCount = merge.segments.info(i-1).docCount;
+ final int lastDocCount = merge.segments.get(i-1).docCount;
starts[i] = starts[i-1] + lastDocCount;
newStarts[i] = newStarts[i-1] + lastDocCount - delCounts[i-1];
}
@@ -69,7 +69,7 @@ final class MergeDocIDRemapper {
// assert docShift > 0;
// Make sure it all adds up:
- assert docShift == maxDocID - (newStarts[docMaps.length-1] + merge.segments.info(docMaps.length-1).docCount - delCounts[docMaps.length-1]);
+ assert docShift == maxDocID - (newStarts[docMaps.length-1] + merge.segments.get(docMaps.length-1).docCount - delCounts[docMaps.length-1]);
}
public int remap(int oldDocID) {
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/MergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/MergePolicy.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/MergePolicy.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/MergePolicy.java Sat May 14 13:51:35 2011
@@ -72,17 +72,24 @@ public abstract class MergePolicy implem
long mergeGen; // used by IndexWriter
boolean isExternal; // used by IndexWriter
int maxNumSegmentsOptimize; // used by IndexWriter
+ long estimatedMergeBytes; // used by IndexWriter
List<SegmentReader> readers; // used by IndexWriter
List<SegmentReader> readerClones; // used by IndexWriter
- public final SegmentInfos segments;
+ public final List<SegmentInfo> segments;
+ public final int totalDocCount;
boolean aborted;
Throwable error;
boolean paused;
- public OneMerge(SegmentInfos segments) {
+ public OneMerge(List<SegmentInfo> segments) {
if (0 == segments.size())
throw new RuntimeException("segments must include at least one segment");
this.segments = segments;
+ int count = 0;
+ for(SegmentInfo info : segments) {
+ count += info.docCount;
+ }
+ totalDocCount = count;
}
/** Record that an exception occurred while executing
@@ -146,7 +153,7 @@ public abstract class MergePolicy implem
final int numSegments = segments.size();
for(int i=0;i<numSegments;i++) {
if (i > 0) b.append(' ');
- b.append(segments.info(i).toString(dir, 0));
+ b.append(segments.get(i).toString(dir, 0));
}
if (info != null)
b.append(" into ").append(info.name);
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/NormsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/NormsWriter.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/NormsWriter.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/NormsWriter.java Sat May 14 13:51:35 2011
@@ -19,11 +19,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Collection;
-import java.util.Iterator;
-import java.util.HashMap;
import java.util.Map;
-import java.util.List;
-import java.util.ArrayList;
import org.apache.lucene.store.IndexOutput;
@@ -36,10 +32,6 @@ import org.apache.lucene.store.IndexOutp
final class NormsWriter extends InvertedDocEndConsumer {
- @Override
- public InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) {
- return new NormsWriterPerThread(docInverterPerThread, this);
- }
@Override
public void abort() {}
@@ -50,40 +42,11 @@ final class NormsWriter extends Inverted
/** Produce _X.nrm if any document had a field with norms
* not disabled */
@Override
- public void flush(Map<InvertedDocEndConsumerPerThread,Collection<InvertedDocEndConsumerPerField>> threadsAndFields, SegmentWriteState state) throws IOException {
-
- final Map<FieldInfo,List<NormsWriterPerField>> byField = new HashMap<FieldInfo,List<NormsWriterPerField>>();
-
+ public void flush(Map<FieldInfo,InvertedDocEndConsumerPerField> fieldsToFlush, SegmentWriteState state) throws IOException {
if (!state.fieldInfos.hasNorms()) {
return;
}
- // Typically, each thread will have encountered the same
- // field. So first we collate by field, ie, all
- // per-thread field instances that correspond to the
- // same FieldInfo
- for (final Map.Entry<InvertedDocEndConsumerPerThread,Collection<InvertedDocEndConsumerPerField>> entry : threadsAndFields.entrySet()) {
- final Collection<InvertedDocEndConsumerPerField> fields = entry.getValue();
- final Iterator<InvertedDocEndConsumerPerField> fieldsIt = fields.iterator();
-
- while (fieldsIt.hasNext()) {
- final NormsWriterPerField perField = (NormsWriterPerField) fieldsIt.next();
-
- if (perField.upto > 0) {
- // It has some norms
- List<NormsWriterPerField> l = byField.get(perField.fieldInfo);
- if (l == null) {
- l = new ArrayList<NormsWriterPerField>();
- byField.put(perField.fieldInfo, l);
- }
- l.add(perField);
- } else
- // Remove this field since we haven't seen it
- // since the previous flush
- fieldsIt.remove();
- }
- }
-
final String normsFileName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.NORMS_EXTENSION);
IndexOutput normsOut = state.directory.createOutput(normsFileName);
@@ -93,60 +56,25 @@ final class NormsWriter extends Inverted
int normCount = 0;
for (FieldInfo fi : state.fieldInfos) {
- final List<NormsWriterPerField> toMerge = byField.get(fi);
+ final NormsWriterPerField toWrite = (NormsWriterPerField) fieldsToFlush.get(fi);
int upto = 0;
- if (toMerge != null) {
-
- final int numFields = toMerge.size();
-
+ if (toWrite != null && toWrite.upto > 0) {
normCount++;
- final NormsWriterPerField[] fields = new NormsWriterPerField[numFields];
- int[] uptos = new int[numFields];
-
- for(int j=0;j<numFields;j++)
- fields[j] = toMerge.get(j);
-
- int numLeft = numFields;
-
- while(numLeft > 0) {
-
- assert uptos[0] < fields[0].docIDs.length : " uptos[0]=" + uptos[0] + " len=" + (fields[0].docIDs.length);
-
- int minLoc = 0;
- int minDocID = fields[0].docIDs[uptos[0]];
-
- for(int j=1;j<numLeft;j++) {
- final int docID = fields[j].docIDs[uptos[j]];
- if (docID < minDocID) {
- minDocID = docID;
- minLoc = j;
- }
- }
-
- assert minDocID < state.numDocs;
-
- // Fill hole
- for(;upto<minDocID;upto++)
+ int docID = 0;
+ for (; docID < state.numDocs; docID++) {
+ if (upto < toWrite.upto && toWrite.docIDs[upto] == docID) {
+ normsOut.writeByte(toWrite.norms[upto]);
+ upto++;
+ } else {
normsOut.writeByte((byte) 0);
-
- normsOut.writeByte(fields[minLoc].norms[uptos[minLoc]]);
- (uptos[minLoc])++;
- upto++;
-
- if (uptos[minLoc] == fields[minLoc].upto) {
- fields[minLoc].reset();
- if (minLoc != numLeft-1) {
- fields[minLoc] = fields[numLeft-1];
- uptos[minLoc] = uptos[numLeft-1];
- }
- numLeft--;
}
}
-
- // Fill final hole with defaultNorm
- for(;upto<state.numDocs;upto++)
- normsOut.writeByte((byte) 0);
+
+ // we should have consumed every norm
+ assert upto == toWrite.upto;
+
+ toWrite.reset();
} else if (fi.isIndexed && !fi.omitNorms) {
normCount++;
// Fill entire field with default norm:
@@ -161,4 +89,16 @@ final class NormsWriter extends Inverted
normsOut.close();
}
}
+
+ @Override
+ void finishDocument() throws IOException {}
+
+ @Override
+ void startDocument() throws IOException {}
+
+ @Override
+ InvertedDocEndConsumerPerField addField(DocInverterPerField docInverterPerField,
+ FieldInfo fieldInfo) {
+ return new NormsWriterPerField(docInverterPerField, fieldInfo);
+ }
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java Sat May 14 13:51:35 2011
@@ -27,9 +27,8 @@ import org.apache.lucene.util.ArrayUtil;
final class NormsWriterPerField extends InvertedDocEndConsumerPerField implements Comparable<NormsWriterPerField> {
- final NormsWriterPerThread perThread;
final FieldInfo fieldInfo;
- final DocumentsWriter.DocState docState;
+ final DocumentsWriterPerThread.DocState docState;
final Similarity similarity;
// Holds all docID/norm pairs we've seen
@@ -46,10 +45,9 @@ final class NormsWriterPerField extends
upto = 0;
}
- public NormsWriterPerField(final DocInverterPerField docInverterPerField, final NormsWriterPerThread perThread, final FieldInfo fieldInfo) {
- this.perThread = perThread;
+ public NormsWriterPerField(final DocInverterPerField docInverterPerField, final FieldInfo fieldInfo) {
this.fieldInfo = fieldInfo;
- docState = perThread.docState;
+ docState = docInverterPerField.docState;
fieldState = docInverterPerField.fieldState;
similarity = docState.similarityProvider.get(fieldInfo.name);
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/SegmentInfo.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/SegmentInfo.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/SegmentInfo.java Sat May 14 13:51:35 2011
@@ -37,14 +37,15 @@ import org.apache.lucene.util.Constants;
/**
* Information about a segment such as it's name, directory, and files related
* to the segment.
- *
+ *
* @lucene.experimental
*/
public final class SegmentInfo {
-
+ // TODO: remove with hasVector and hasProx
+ private static final int CHECK_FIELDINFO = -2;
static final int NO = -1; // e.g. no norms; no deletes;
static final int YES = 1; // e.g. have norms; have deletes;
- static final int WITHOUT_GEN = 0; // a file name that has no GEN in it.
+ static final int WITHOUT_GEN = 0; // a file name that has no GEN in it.
public String name; // unique name in dir
public int docCount; // number of docs in seg
@@ -56,7 +57,7 @@ public final class SegmentInfo {
* - YES or higher if there are deletes at generation N
*/
private long delGen;
-
+
/*
* Current generation of each field's norm file. If this array is null,
* means no separate norms. If this array is not null, its values mean:
@@ -65,7 +66,7 @@ public final class SegmentInfo {
*/
private Map<Integer,Long> normGen;
- private boolean isCompoundFile;
+ private boolean isCompoundFile;
private volatile List<String> files; // cached list of files that this segment uses
// in the Directory
@@ -73,17 +74,22 @@ public final class SegmentInfo {
private volatile long sizeInBytesNoStore = -1; // total byte size of all but the store files (computed on demand)
private volatile long sizeInBytesWithStore = -1; // total byte size of all of our files (computed on demand)
+ //TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
private int docStoreOffset; // if this segment shares stored fields & vectors, this
// offset is where in that file this segment's docs begin
+ //TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
private String docStoreSegment; // name used to derive fields/vectors file we share with
// other segments
+ //TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
private boolean docStoreIsCompoundFile; // whether doc store files are stored in compound file (*.cfx)
private int delCount; // How many deleted docs in this segment
+
+ //TODO: remove when we don't have to support old indexes anymore that had this field
+ private int hasVectors = CHECK_FIELDINFO;
+ //TODO: remove when we don't have to support old indexes anymore that had this field
+ private int hasProx = CHECK_FIELDINFO; // True if this segment has any fields with omitTermFreqAndPositions==false
- private boolean hasProx; // True if this segment has any fields with omitTermFreqAndPositions==false
-
- private boolean hasVectors; // True if this segment wrote term vectors
private FieldInfos fieldInfos;
@@ -91,9 +97,9 @@ public final class SegmentInfo {
private Map<String,String> diagnostics;
- // Tracks the Lucene version this segment was created with, since 3.1. Null
+ // Tracks the Lucene version this segment was created with, since 3.1. Null
// indicates an older than 3.0 index, and it's used to detect a too old index.
- // The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and
+ // The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and
// specific versions afterwards ("3.0", "3.1" etc.).
// see Constants.LUCENE_MAIN_VERSION.
private String version;
@@ -102,8 +108,11 @@ public final class SegmentInfo {
// this is never written to/read from the Directory
private long bufferedDeletesGen;
+ // holds the fieldInfos Version to refresh files() cache if FI has changed
+ private long fieldInfosVersion;
+
public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile,
- boolean hasProx, SegmentCodecs segmentCodecs, boolean hasVectors, FieldInfos fieldInfos) {
+ SegmentCodecs segmentCodecs, FieldInfos fieldInfos) {
this.name = name;
this.docCount = docCount;
this.dir = dir;
@@ -111,9 +120,7 @@ public final class SegmentInfo {
this.isCompoundFile = isCompoundFile;
this.docStoreOffset = -1;
this.docStoreSegment = name;
- this.hasProx = hasProx;
this.segmentCodecs = segmentCodecs;
- this.hasVectors = hasVectors;
delCount = 0;
version = Constants.LUCENE_MAIN_VERSION;
this.fieldInfos = fieldInfos;
@@ -182,11 +189,13 @@ public final class SegmentInfo {
docStoreSegment = name;
docStoreIsCompoundFile = false;
}
+
if (format > DefaultSegmentInfosWriter.FORMAT_4_0) {
// pre-4.0 indexes write a byte if there is a single norms file
byte b = input.readByte();
assert 1 == b;
}
+
int numNormGen = input.readInt();
if (numNormGen == NO) {
normGen = null;
@@ -206,8 +215,8 @@ public final class SegmentInfo {
delCount = input.readInt();
assert delCount <= docCount;
- hasProx = input.readByte() == YES;
-
+ hasProx = input.readByte();
+
// System.out.println(Thread.currentThread().getName() + ": si.read hasProx=" + hasProx + " seg=" + name);
if (format <= DefaultSegmentInfosWriter.FORMAT_4_0) {
segmentCodecs = new SegmentCodecs(codecs, input);
@@ -217,9 +226,9 @@ public final class SegmentInfo {
segmentCodecs = new SegmentCodecs(codecs, new Codec[] { codecs.lookup("PreFlex")});
}
diagnostics = input.readStringStringMap();
-
+
if (format <= DefaultSegmentInfosWriter.FORMAT_HAS_VECTORS) {
- hasVectors = input.readByte() == 1;
+ hasVectors = input.readByte();
} else {
final String storesSegment;
final String ext;
@@ -240,7 +249,7 @@ public final class SegmentInfo {
dirToTest = dir;
}
try {
- hasVectors = dirToTest.fileExists(IndexFileNames.segmentFileName(storesSegment, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
+ hasVectors = dirToTest.fileExists(IndexFileNames.segmentFileName(storesSegment, "", IndexFileNames.VECTORS_INDEX_EXTENSION)) ? YES : NO;
} finally {
if (isCompoundFile) {
dirToTest.close();
@@ -304,14 +313,9 @@ public final class SegmentInfo {
}
public boolean getHasVectors() throws IOException {
- return hasVectors;
- }
-
- public void setHasVectors(boolean v) {
- hasVectors = v;
- clearFilesCache();
+ return hasVectors == CHECK_FIELDINFO ? getFieldInfos().hasVectors() : hasVectors == YES;
}
-
+
public FieldInfos getFieldInfos() throws IOException {
loadFieldInfos(dir, true);
return fieldInfos;
@@ -342,7 +346,7 @@ public final class SegmentInfo {
@Override
public Object clone() {
- final SegmentInfo si = new SegmentInfo(name, docCount, dir, isCompoundFile, hasProx, segmentCodecs, hasVectors,
+ final SegmentInfo si = new SegmentInfo(name, docCount, dir, isCompoundFile, segmentCodecs,
fieldInfos == null ? null : (FieldInfos) fieldInfos.clone());
si.docStoreOffset = docStoreOffset;
si.docStoreSegment = docStoreSegment;
@@ -357,6 +361,8 @@ public final class SegmentInfo {
}
}
si.version = version;
+ si.hasProx = hasProx;
+ si.hasVectors = hasVectors;
return si;
}
@@ -366,7 +372,7 @@ public final class SegmentInfo {
// against this segment
return null;
} else {
- return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen);
+ return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen);
}
}
@@ -432,7 +438,7 @@ public final class SegmentInfo {
if (hasSeparateNorms(number)) {
return IndexFileNames.fileNameFromGeneration(name, "s" + number, normGen.get(number));
} else {
- // single file for all norms
+ // single file for all norms
return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.NORMS_EXTENSION, WITHOUT_GEN);
}
}
@@ -465,39 +471,74 @@ public final class SegmentInfo {
assert delCount <= docCount;
}
+ /**
+ * @deprecated shared doc stores are not supported in >= 4.0
+ */
+ @Deprecated
public int getDocStoreOffset() {
+ // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
return docStoreOffset;
}
-
+
+ /**
+ * @deprecated shared doc stores are not supported in >= 4.0
+ */
+ @Deprecated
public boolean getDocStoreIsCompoundFile() {
+ // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
return docStoreIsCompoundFile;
}
-
- void setDocStoreIsCompoundFile(boolean v) {
- docStoreIsCompoundFile = v;
+
+ /**
+ * @deprecated shared doc stores are not supported in >= 4.0
+ */
+ @Deprecated
+ public void setDocStoreIsCompoundFile(boolean docStoreIsCompoundFile) {
+ // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
+ this.docStoreIsCompoundFile = docStoreIsCompoundFile;
clearFilesCache();
}
-
+
+ /**
+ * @deprecated shared doc stores are not supported in >= 4.0
+ */
+ @Deprecated
+ void setDocStore(int offset, String segment, boolean isCompoundFile) {
+ // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
+ docStoreOffset = offset;
+ docStoreSegment = segment;
+ docStoreIsCompoundFile = isCompoundFile;
+ clearFilesCache();
+ }
+
+ /**
+ * @deprecated shared doc stores are not supported in >= 4.0
+ */
+ @Deprecated
public String getDocStoreSegment() {
+ // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
return docStoreSegment;
}
-
- public void setDocStoreSegment(String segment) {
- docStoreSegment = segment;
- }
-
+
+ /**
+ * @deprecated shared doc stores are not supported in >= 4.0
+ */
+ @Deprecated
void setDocStoreOffset(int offset) {
+ // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
docStoreOffset = offset;
clearFilesCache();
}
- void setDocStore(int offset, String segment, boolean isCompoundFile) {
- docStoreOffset = offset;
- docStoreSegment = segment;
- docStoreIsCompoundFile = isCompoundFile;
- clearFilesCache();
+ /**
+ * @deprecated shared doc stores are not supported in 4.0
+ */
+ @Deprecated
+ public void setDocStoreSegment(String docStoreSegment) {
+ // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
+ this.docStoreSegment = docStoreSegment;
}
-
+
/** Save this segment's info. */
public void write(IndexOutput output)
throws IOException {
@@ -507,12 +548,14 @@ public final class SegmentInfo {
output.writeString(name);
output.writeInt(docCount);
output.writeLong(delGen);
+
output.writeInt(docStoreOffset);
if (docStoreOffset != -1) {
output.writeString(docStoreSegment);
output.writeByte((byte) (docStoreIsCompoundFile ? 1:0));
}
+
if (normGen == null) {
output.writeInt(NO);
} else {
@@ -522,22 +565,17 @@ public final class SegmentInfo {
output.writeLong(entry.getValue());
}
}
-
+
output.writeByte((byte) (isCompoundFile ? YES : NO));
output.writeInt(delCount);
- output.writeByte((byte) (hasProx ? 1:0));
+ output.writeByte((byte) (hasProx));
segmentCodecs.write(output);
output.writeStringStringMap(diagnostics);
- output.writeByte((byte) (hasVectors ? 1 : 0));
+ output.writeByte((byte) (hasVectors));
}
- void setHasProx(boolean hasProx) {
- this.hasProx = hasProx;
- clearFilesCache();
- }
-
- public boolean getHasProx() {
- return hasProx;
+ public boolean getHasProx() throws IOException {
+ return hasProx == CHECK_FIELDINFO ? getFieldInfos().hasProx() : hasProx == YES;
}
/** Can only be called once. */
@@ -565,14 +603,15 @@ public final class SegmentInfo {
*/
public List<String> files() throws IOException {
-
- if (files != null) {
+ final long fisVersion = fieldInfosVersion;
+ if (fisVersion != (fieldInfosVersion = getFieldInfos().getVersion())) {
+ clearFilesCache(); // FIS has modifications - need to recompute
+ } else if (files != null) {
// Already cached:
return files;
}
-
- Set<String> fileSet = new HashSet<String>();
-
+ final Set<String> fileSet = new HashSet<String>();
+
boolean useCompoundFile = getUseCompoundFile();
if (useCompoundFile) {
@@ -593,7 +632,7 @@ public final class SegmentInfo {
} else {
fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.FIELDS_INDEX_EXTENSION));
fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.FIELDS_EXTENSION));
- if (hasVectors) {
+ if (getHasVectors()) {
fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
@@ -602,11 +641,11 @@ public final class SegmentInfo {
} else if (!useCompoundFile) {
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.FIELDS_INDEX_EXTENSION));
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.FIELDS_EXTENSION));
- if (hasVectors) {
+ if (getHasVectors()) {
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
- }
+ }
}
String delFileName = IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen);
@@ -644,7 +683,7 @@ public final class SegmentInfo {
}
/** Used for debugging. Format may suddenly change.
- *
+ *
* <p>Current format looks like
* <code>_a(3.1):c45/4->_1</code>, which means the segment's
* name is <code>_a</code>; it was created with Lucene 3.1 (or
@@ -659,15 +698,18 @@ public final class SegmentInfo {
StringBuilder s = new StringBuilder();
s.append(name).append('(').append(version == null ? "?" : version).append(')').append(':');
-
char cfs = getUseCompoundFile() ? 'c' : 'C';
s.append(cfs);
if (this.dir != dir) {
s.append('x');
}
- if (hasVectors) {
- s.append('v');
+ try {
+ if (getHasVectors()) {
+ s.append('v');
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
}
s.append(docCount);
@@ -675,7 +717,7 @@ public final class SegmentInfo {
if (delCount != 0) {
s.append('/').append(delCount);
}
-
+
if (docStoreOffset != -1) {
s.append("->").append(docStoreSegment);
if (docStoreIsCompoundFile) {
@@ -715,13 +757,13 @@ public final class SegmentInfo {
* <b>NOTE:</b> this method is used for internal purposes only - you should
* not modify the version of a SegmentInfo, or it may result in unexpected
* exceptions thrown when you attempt to open the index.
- *
+ *
* @lucene.internal
*/
public void setVersion(String version) {
this.version = version;
}
-
+
/** Returns the version of the code which wrote the segment. */
public String getVersion() {
return version;
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/SegmentMerger.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/SegmentMerger.java Sat May 14 13:51:35 2011
@@ -39,24 +39,24 @@ import org.apache.lucene.util.ReaderUtil
/**
* The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add},
- * into a single Segment. After adding the appropriate readers, call the merge method to combine the
+ * into a single Segment. After adding the appropriate readers, call the merge method to combine the
* segments.
- *
+ *
* @see #merge
* @see #add
*/
final class SegmentMerger {
-
+
/** norms header placeholder */
- static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1};
-
+ static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1};
+
private Directory directory;
private String segment;
private int termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL;
private List<IndexReader> readers = new ArrayList<IndexReader>();
private final FieldInfos fieldInfos;
-
+
private int mergedDocs;
private final MergeState.CheckAbort checkAbort;
@@ -64,17 +64,15 @@ final class SegmentMerger {
/** Maximum number of contiguous documents to bulk-copy
when merging stored fields */
private final static int MAX_RAW_MERGE_DOCS = 4192;
-
- private final CodecProvider codecs;
+
private Codec codec;
private SegmentWriteState segmentWriteState;
private PayloadProcessorProvider payloadProcessorProvider;
-
- SegmentMerger(Directory dir, int termIndexInterval, String name, MergePolicy.OneMerge merge, CodecProvider codecs, PayloadProcessorProvider payloadProcessorProvider, FieldInfos fieldInfos) {
+
+ SegmentMerger(Directory dir, int termIndexInterval, String name, MergePolicy.OneMerge merge, PayloadProcessorProvider payloadProcessorProvider, FieldInfos fieldInfos) {
this.payloadProcessorProvider = payloadProcessorProvider;
directory = dir;
- this.codecs = codecs;
segment = name;
this.fieldInfos = fieldInfos;
if (merge != null) {
@@ -135,10 +133,10 @@ final class SegmentMerger {
for (String file : files) {
cfsWriter.addFile(file);
}
-
+
// Perform the merge
cfsWriter.close();
-
+
return files;
}
@@ -196,13 +194,12 @@ final class SegmentMerger {
}
/**
- *
+ *
* @return The number of documents in all of the readers
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
private int mergeFields() throws CorruptIndexException, IOException {
-
for (IndexReader reader : readers) {
if (reader instanceof SegmentReader) {
SegmentReader segmentReader = (SegmentReader) reader;
@@ -265,7 +262,7 @@ final class SegmentMerger {
throw new RuntimeException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.fileExists(fileName) + "; now aborting this merge to prevent index corruption");
segmentWriteState = new SegmentWriteState(null, directory, segment, fieldInfos, docCount, termIndexInterval, codecInfo, null);
-
+
return docCount;
}
@@ -283,7 +280,7 @@ final class SegmentMerger {
++j;
continue;
}
- // We can optimize this case (doing a bulk byte copy) since the field
+ // We can optimize this case (doing a bulk byte copy) since the field
// numbers are identical
int start = j, numDocs = 0;
do {
@@ -295,7 +292,7 @@ final class SegmentMerger {
break;
}
} while(numDocs < MAX_RAW_MERGE_DOCS);
-
+
IndexInput stream = matchingFieldsReader.rawDocs(rawDocLengths, start, numDocs);
fieldsWriter.addRawDocuments(stream, rawDocLengths, numDocs);
docCount += numDocs;
@@ -349,7 +346,7 @@ final class SegmentMerger {
* @throws IOException
*/
private final void mergeVectors() throws IOException {
- TermVectorsWriter termVectorsWriter =
+ TermVectorsWriter termVectorsWriter =
new TermVectorsWriter(directory, segment, fieldInfos);
try {
@@ -369,7 +366,7 @@ final class SegmentMerger {
copyVectorsWithDeletions(termVectorsWriter, matchingVectorsReader, reader);
} else {
copyVectorsNoDeletions(termVectorsWriter, matchingVectorsReader, reader);
-
+
}
}
} finally {
@@ -402,7 +399,7 @@ final class SegmentMerger {
++docNum;
continue;
}
- // We can optimize this case (doing a bulk byte copy) since the field
+ // We can optimize this case (doing a bulk byte copy) since the field
// numbers are identical
int start = docNum, numDocs = 0;
do {
@@ -414,7 +411,7 @@ final class SegmentMerger {
break;
}
} while(numDocs < MAX_RAW_MERGE_DOCS);
-
+
matchingVectorsReader.rawDocs(rawDocLengths, rawDocLengths2, start, numDocs);
termVectorsWriter.addRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs);
checkAbort.work(300 * numDocs);
@@ -425,7 +422,7 @@ final class SegmentMerger {
// skip deleted docs
continue;
}
-
+
// NOTE: it's very important to first assign to vectors then pass it to
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
TermFreqVector[] vectors = reader.getTermFreqVectors(docNum);
@@ -434,7 +431,7 @@ final class SegmentMerger {
}
}
}
-
+
private void copyVectorsNoDeletions(final TermVectorsWriter termVectorsWriter,
final TermVectorsReader matchingVectorsReader,
final IndexReader reader)
@@ -470,7 +467,7 @@ final class SegmentMerger {
// Let CodecProvider decide which codec will be used to write
// the new segment:
-
+
int docBase = 0;
final List<Fields> fields = new ArrayList<Fields>();
@@ -498,7 +495,7 @@ final class SegmentMerger {
mergeState.readerCount = readers.size();
mergeState.fieldInfos = fieldInfos;
mergeState.mergedDocCount = mergedDocs;
-
+
// Remap docIDs
mergeState.delCounts = new int[mergeState.readerCount];
mergeState.docMaps = new int[mergeState.readerCount][];
@@ -536,7 +533,7 @@ final class SegmentMerger {
}
assert delCount == mergeState.delCounts[i]: "reader delCount=" + mergeState.delCounts[i] + " vs recomputed delCount=" + delCount;
}
-
+
if (payloadProcessorProvider != null) {
mergeState.dirPayloadProcessor[i] = payloadProcessorProvider.getDirProcessor(reader.directory());
}
@@ -549,7 +546,7 @@ final class SegmentMerger {
// apart when we step through the docs enums in
// MultiDocsEnum.
mergeState.multiDeletedDocs = new MultiBits(bits, bitsStarts);
-
+
try {
consumer.merge(mergeState,
new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
@@ -568,7 +565,7 @@ final class SegmentMerger {
int[] getDelCounts() {
return mergeState.delCounts;
}
-
+
public boolean getAnyNonBulkMerges() {
assert matchedCount <= readers.size();
return matchedCount != readers.size();
@@ -579,7 +576,7 @@ final class SegmentMerger {
try {
for (FieldInfo fi : fieldInfos) {
if (fi.isIndexed && !fi.omitNorms) {
- if (output == null) {
+ if (output == null) {
output = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION));
output.writeBytes(NORMS_HEADER,NORMS_HEADER.length);
}
@@ -610,7 +607,7 @@ final class SegmentMerger {
}
}
} finally {
- if (output != null) {
+ if (output != null) {
output.close();
}
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/SegmentReader.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/SegmentReader.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/SegmentReader.java Sat May 14 13:51:35 2011
@@ -55,6 +55,9 @@ public class SegmentReader extends Index
AtomicInteger deletedDocsRef = null;
private boolean deletedDocsDirty = false;
private boolean normsDirty = false;
+
+ // TODO: we should move this tracking into SegmentInfo;
+ // this way SegmentInfo.toString shows pending deletes
private int pendingDeleteCount;
private boolean rollbackHasChanges = false;
@@ -810,8 +813,9 @@ public class SegmentReader extends Index
oldRef.decrementAndGet();
}
deletedDocsDirty = true;
- if (!deletedDocs.getAndSet(docNum))
+ if (!deletedDocs.getAndSet(docNum)) {
pendingDeleteCount++;
+ }
}
@Override
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java Sat May 14 13:51:35 2011
@@ -31,7 +31,6 @@ public class SegmentWriteState {
public final String segmentName;
public final FieldInfos fieldInfos;
public final int numDocs;
- public boolean hasVectors;
// Deletes to apply while we are flushing the segment. A
// Term is enrolled in here if it was deleted at one
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java Sat May 14 13:51:35 2011
@@ -18,7 +18,8 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
-import org.apache.lucene.store.RAMOutputStream;
+
+import org.apache.lucene.document.Fieldable;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator;
@@ -26,22 +27,38 @@ import org.apache.lucene.util.RamUsageEs
final class StoredFieldsWriter {
FieldsWriter fieldsWriter;
- final DocumentsWriter docWriter;
+ final DocumentsWriterPerThread docWriter;
int lastDocID;
- PerDoc[] docFreeList = new PerDoc[1];
int freeCount;
- public StoredFieldsWriter(DocumentsWriter docWriter) {
+ final DocumentsWriterPerThread.DocState docState;
+
+ public StoredFieldsWriter(DocumentsWriterPerThread docWriter) {
this.docWriter = docWriter;
+ this.docState = docWriter.docState;
+ }
+
+ private int numStoredFields;
+ private Fieldable[] storedFields;
+ private int[] fieldNumbers;
+
+ public void reset() {
+ numStoredFields = 0;
+ storedFields = new Fieldable[1];
+ fieldNumbers = new int[1];
}
- public StoredFieldsWriterPerThread addThread(DocumentsWriter.DocState docState) throws IOException {
- return new StoredFieldsWriterPerThread(docState, this);
+ public void startDocument() {
+ reset();
}
- synchronized public void flush(SegmentWriteState state) throws IOException {
- if (state.numDocs > lastDocID) {
+ public void flush(SegmentWriteState state) throws IOException {
+
+ if (state.numDocs > 0) {
+ // It's possible that all documents seen in this segment
+ // hit non-aborting exceptions, in which case we will
+ // not have yet init'd the FieldsWriter:
initFieldsWriter();
fill(state.numDocs);
}
@@ -67,23 +84,9 @@ final class StoredFieldsWriter {
int allocCount;
- synchronized PerDoc getPerDoc() {
- if (freeCount == 0) {
- allocCount++;
- if (allocCount > docFreeList.length) {
- // Grow our free list up front to make sure we have
- // enough space to recycle all outstanding PerDoc
- // instances
- assert allocCount == 1+docFreeList.length;
- docFreeList = new PerDoc[ArrayUtil.oversize(allocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
- }
- return new PerDoc();
- } else {
- return docFreeList[--freeCount];
- }
- }
+ void abort() {
+ reset();
- synchronized void abort() {
if (fieldsWriter != null) {
fieldsWriter.abort();
fieldsWriter = null;
@@ -101,53 +104,40 @@ final class StoredFieldsWriter {
}
}
- synchronized void finishDocument(PerDoc perDoc) throws IOException {
+ void finishDocument() throws IOException {
assert docWriter.writer.testPoint("StoredFieldsWriter.finishDocument start");
+
initFieldsWriter();
+ fill(docState.docID);
- fill(perDoc.docID);
+ if (fieldsWriter != null && numStoredFields > 0) {
+ fieldsWriter.startDocument(numStoredFields);
+ for (int i = 0; i < numStoredFields; i++) {
+ fieldsWriter.writeField(fieldNumbers[i], storedFields[i]);
+ }
+ lastDocID++;
+ }
- // Append stored fields to the real FieldsWriter:
- fieldsWriter.flushDocument(perDoc.numStoredFields, perDoc.fdt);
- lastDocID++;
- perDoc.reset();
- free(perDoc);
+ reset();
assert docWriter.writer.testPoint("StoredFieldsWriter.finishDocument end");
}
- synchronized void free(PerDoc perDoc) {
- assert freeCount < docFreeList.length;
- assert 0 == perDoc.numStoredFields;
- assert 0 == perDoc.fdt.length();
- assert 0 == perDoc.fdt.getFilePointer();
- docFreeList[freeCount++] = perDoc;
- }
-
- class PerDoc extends DocumentsWriter.DocWriter {
- final DocumentsWriter.PerDocBuffer buffer = docWriter.newPerDocBuffer();
- RAMOutputStream fdt = new RAMOutputStream(buffer);
- int numStoredFields;
-
- void reset() {
- fdt.reset();
- buffer.recycle();
- numStoredFields = 0;
+ public void addField(Fieldable field, FieldInfo fieldInfo) throws IOException {
+ if (numStoredFields == storedFields.length) {
+ int newSize = ArrayUtil.oversize(numStoredFields + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
+ Fieldable[] newArray = new Fieldable[newSize];
+ System.arraycopy(storedFields, 0, newArray, 0, numStoredFields);
+ storedFields = newArray;
}
- @Override
- void abort() {
- reset();
- free(this);
+ if (numStoredFields == fieldNumbers.length) {
+ fieldNumbers = ArrayUtil.grow(fieldNumbers);
}
- @Override
- public long sizeInBytes() {
- return buffer.getSizeInBytes();
- }
+ storedFields[numStoredFields] = field;
+ fieldNumbers[numStoredFields] = fieldInfo.number;
+ numStoredFields++;
- @Override
- public void finish() throws IOException {
- finishDocument(this);
- }
+ assert docState.testPoint("StoredFieldsWriterPerThread.processFields.writeField");
}
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java Sat May 14 13:51:35 2011
@@ -17,84 +17,59 @@ package org.apache.lucene.index;
* limitations under the License.
*/
+import java.io.IOException;
+import java.util.Map;
+
import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;
-import java.io.IOException;
-import java.util.Collection;
-
-import java.util.Map;
-
final class TermVectorsTermsWriter extends TermsHashConsumer {
- final DocumentsWriter docWriter;
- PerDoc[] docFreeList = new PerDoc[1];
+ final DocumentsWriterPerThread docWriter;
int freeCount;
IndexOutput tvx;
IndexOutput tvd;
IndexOutput tvf;
int lastDocID;
+
+ final DocumentsWriterPerThread.DocState docState;
+ final BytesRef flushTerm = new BytesRef();
+
+ // Used by perField when serializing the term vectors
+ final ByteSliceReader vectorSliceReader = new ByteSliceReader();
boolean hasVectors;
- public TermVectorsTermsWriter(DocumentsWriter docWriter) {
+ public TermVectorsTermsWriter(DocumentsWriterPerThread docWriter) {
this.docWriter = docWriter;
+ docState = docWriter.docState;
}
@Override
- public TermsHashConsumerPerThread addThread(TermsHashPerThread termsHashPerThread) {
- return new TermVectorsTermsWriterPerThread(termsHashPerThread, this);
- }
-
- @Override
- synchronized void flush(Map<TermsHashConsumerPerThread,Collection<TermsHashConsumerPerField>> threadsAndFields, final SegmentWriteState state) throws IOException {
+ void flush(Map<FieldInfo, TermsHashConsumerPerField> fieldsToFlush, final SegmentWriteState state) throws IOException {
if (tvx != null) {
// At least one doc in this run had term vectors enabled
fill(state.numDocs);
+ assert state.segmentName != null;
+ String idxName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.VECTORS_INDEX_EXTENSION);
tvx.close();
tvf.close();
tvd.close();
tvx = tvd = tvf = null;
- assert state.segmentName != null;
- String idxName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.VECTORS_INDEX_EXTENSION);
- if (4 + ((long) state.numDocs) * 16 != state.directory.fileLength(idxName)) {
+ if (4+((long) state.numDocs)*16 != state.directory.fileLength(idxName)) {
throw new RuntimeException("after flush: tvx size mismatch: " + state.numDocs + " docs vs " + state.directory.fileLength(idxName) + " length in bytes of " + idxName + " file exists?=" + state.directory.fileExists(idxName));
}
lastDocID = 0;
- state.hasVectors = hasVectors;
hasVectors = false;
}
- for (Map.Entry<TermsHashConsumerPerThread,Collection<TermsHashConsumerPerField>> entry : threadsAndFields.entrySet()) {
- for (final TermsHashConsumerPerField field : entry.getValue() ) {
- TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField) field;
- perField.termsHashPerField.reset();
- perField.shrinkHash();
- }
-
- TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread) entry.getKey();
- perThread.termsHashPerThread.reset(true);
- }
- }
-
- int allocCount;
-
- synchronized PerDoc getPerDoc() {
- if (freeCount == 0) {
- allocCount++;
- if (allocCount > docFreeList.length) {
- // Grow our free list up front to make sure we have
- // enough space to recycle all outstanding PerDoc
- // instances
- assert allocCount == 1+docFreeList.length;
- docFreeList = new PerDoc[ArrayUtil.oversize(allocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
- }
- return new PerDoc();
- } else {
- return docFreeList[--freeCount];
+ for (final TermsHashConsumerPerField field : fieldsToFlush.values() ) {
+ TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField) field;
+ perField.termsHashPerField.reset();
+ perField.shrinkHash();
}
}
@@ -112,18 +87,17 @@ final class TermVectorsTermsWriter exten
}
}
- synchronized void initTermVectorsWriter() throws IOException {
+ private final void initTermVectorsWriter() throws IOException {
if (tvx == null) {
// If we hit an exception while init'ing the term
// vector output files, we must abort this segment
// because those files will be in an unknown
// state:
- hasVectors = true;
tvx = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_INDEX_EXTENSION));
tvd = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
tvf = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
-
+
tvx.writeInt(TermVectorsReader.FORMAT_CURRENT);
tvd.writeInt(TermVectorsReader.FORMAT_CURRENT);
tvf.writeInt(TermVectorsReader.FORMAT_CURRENT);
@@ -132,39 +106,45 @@ final class TermVectorsTermsWriter exten
}
}
- synchronized void finishDocument(PerDoc perDoc) throws IOException {
+ @Override
+ void finishDocument(TermsHash termsHash) throws IOException {
assert docWriter.writer.testPoint("TermVectorsTermsWriter.finishDocument start");
+ if (!hasVectors) {
+ return;
+ }
+
initTermVectorsWriter();
- fill(perDoc.docID);
+ fill(docState.docID);
// Append term vectors to the real outputs:
tvx.writeLong(tvd.getFilePointer());
tvx.writeLong(tvf.getFilePointer());
- tvd.writeVInt(perDoc.numVectorFields);
- if (perDoc.numVectorFields > 0) {
- for(int i=0;i<perDoc.numVectorFields;i++) {
- tvd.writeVInt(perDoc.fieldNumbers[i]);
- }
- assert 0 == perDoc.fieldPointers[0];
- long lastPos = perDoc.fieldPointers[0];
- for(int i=1;i<perDoc.numVectorFields;i++) {
- long pos = perDoc.fieldPointers[i];
+ tvd.writeVInt(numVectorFields);
+ if (numVectorFields > 0) {
+ for(int i=0;i<numVectorFields;i++) {
+ tvd.writeVInt(perFields[i].fieldInfo.number);
+ }
+ long lastPos = tvf.getFilePointer();
+ perFields[0].finishDocument();
+ for(int i=1;i<numVectorFields;i++) {
+ long pos = tvf.getFilePointer();
tvd.writeVLong(pos-lastPos);
lastPos = pos;
+ perFields[i].finishDocument();
+ // commit the termVectors once successful success - FI will otherwise reset them
+ perFields[i].fieldInfo.commitVectors();
}
- perDoc.perDocTvf.writeTo(tvf);
- perDoc.numVectorFields = 0;
}
- assert lastDocID == perDoc.docID;
+ assert lastDocID == docState.docID;
lastDocID++;
- perDoc.reset();
- free(perDoc);
+ termsHash.reset();
+ reset();
assert docWriter.writer.testPoint("TermVectorsTermsWriter.finishDocument end");
}
@@ -189,55 +169,58 @@ final class TermVectorsTermsWriter exten
}
tvx = tvd = tvf = null;
lastDocID = 0;
- }
- synchronized void free(PerDoc doc) {
- assert freeCount < docFreeList.length;
- docFreeList[freeCount++] = doc;
+ reset();
}
- class PerDoc extends DocumentsWriter.DocWriter {
+ int numVectorFields;
- final DocumentsWriter.PerDocBuffer buffer = docWriter.newPerDocBuffer();
- RAMOutputStream perDocTvf = new RAMOutputStream(buffer);
+ TermVectorsTermsWriterPerField[] perFields;
- int numVectorFields;
+ void reset() {
+ numVectorFields = 0;
+ perFields = new TermVectorsTermsWriterPerField[1];
+ }
- int[] fieldNumbers = new int[1];
- long[] fieldPointers = new long[1];
+ @Override
+ public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo) {
+ return new TermVectorsTermsWriterPerField(termsHashPerField, this, fieldInfo);
+ }
- void reset() {
- perDocTvf.reset();
- buffer.recycle();
- numVectorFields = 0;
+ void addFieldToFlush(TermVectorsTermsWriterPerField fieldToFlush) {
+ if (numVectorFields == perFields.length) {
+ int newSize = ArrayUtil.oversize(numVectorFields + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
+ TermVectorsTermsWriterPerField[] newArray = new TermVectorsTermsWriterPerField[newSize];
+ System.arraycopy(perFields, 0, newArray, 0, numVectorFields);
+ perFields = newArray;
}
- @Override
- void abort() {
- reset();
- free(this);
- }
+ perFields[numVectorFields++] = fieldToFlush;
+ }
- void addField(final int fieldNumber) {
- if (numVectorFields == fieldNumbers.length) {
- fieldNumbers = ArrayUtil.grow(fieldNumbers);
- }
- if (numVectorFields == fieldPointers.length) {
- fieldPointers = ArrayUtil.grow(fieldPointers);
- }
- fieldNumbers[numVectorFields] = fieldNumber;
- fieldPointers[numVectorFields] = perDocTvf.getFilePointer();
- numVectorFields++;
- }
+ @Override
+ void startDocument() throws IOException {
+ assert clearLastVectorFieldName();
+ reset();
+ }
- @Override
- public long sizeInBytes() {
- return buffer.getSizeInBytes();
- }
+ // Called only by assert
+ final boolean clearLastVectorFieldName() {
+ lastVectorFieldName = null;
+ return true;
+ }
- @Override
- public void finish() throws IOException {
- finishDocument(this);
+ // Called only by assert
+ String lastVectorFieldName;
+ final boolean vectorFieldsInOrder(FieldInfo fi) {
+ try {
+ if (lastVectorFieldName != null)
+ return lastVectorFieldName.compareTo(fi.name) < 0;
+ else
+ return true;
+ } finally {
+ lastVectorFieldName = fi.name;
}
}
+
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java Sat May 14 13:51:35 2011
@@ -28,11 +28,10 @@ import org.apache.lucene.util.RamUsageEs
final class TermVectorsTermsWriterPerField extends TermsHashConsumerPerField {
- final TermVectorsTermsWriterPerThread perThread;
final TermsHashPerField termsHashPerField;
final TermVectorsTermsWriter termsWriter;
final FieldInfo fieldInfo;
- final DocumentsWriter.DocState docState;
+ final DocumentsWriterPerThread.DocState docState;
final FieldInvertState fieldState;
boolean doVectors;
@@ -41,11 +40,10 @@ final class TermVectorsTermsWriterPerFie
int maxNumPostings;
OffsetAttribute offsetAttribute = null;
-
- public TermVectorsTermsWriterPerField(TermsHashPerField termsHashPerField, TermVectorsTermsWriterPerThread perThread, FieldInfo fieldInfo) {
+
+ public TermVectorsTermsWriterPerField(TermsHashPerField termsHashPerField, TermVectorsTermsWriter termsWriter, FieldInfo fieldInfo) {
this.termsHashPerField = termsHashPerField;
- this.perThread = perThread;
- this.termsWriter = perThread.termsWriter;
+ this.termsWriter = termsWriter;
this.fieldInfo = fieldInfo;
docState = termsHashPerField.docState;
fieldState = termsHashPerField.fieldState;
@@ -72,22 +70,12 @@ final class TermVectorsTermsWriterPerFie
}
if (doVectors) {
- if (perThread.doc == null) {
- perThread.doc = termsWriter.getPerDoc();
- perThread.doc.docID = docState.docID;
- assert perThread.doc.numVectorFields == 0;
- assert 0 == perThread.doc.perDocTvf.length();
- assert 0 == perThread.doc.perDocTvf.getFilePointer();
- }
-
- assert perThread.doc.docID == docState.docID;
-
+ termsWriter.hasVectors = true;
if (termsHashPerField.bytesHash.size() != 0) {
// Only necessary if previous doc hit a
// non-aborting exception while writing vectors in
// this field:
termsHashPerField.reset();
- perThread.termsHashPerThread.reset(false);
}
}
@@ -95,42 +83,42 @@ final class TermVectorsTermsWriterPerFie
//perThread.postingsCount = 0;
return doVectors;
- }
+ }
public void abort() {}
/** Called once per field per document if term vectors
* are enabled, to write the vectors to
* RAMOutputStream, which is then quickly flushed to
- * the real term vectors files in the Directory. */
- @Override
+ * the real term vectors files in the Directory. */ @Override
void finish() throws IOException {
+ if (!doVectors || termsHashPerField.bytesHash.size() == 0)
+ return;
+ termsWriter.addFieldToFlush(this);
+ }
+
+ void finishDocument() throws IOException {
assert docState.testPoint("TermVectorsTermsWriterPerField.finish start");
final int numPostings = termsHashPerField.bytesHash.size();
- final BytesRef flushTerm = perThread.flushTerm;
+ final BytesRef flushTerm = termsWriter.flushTerm;
assert numPostings >= 0;
- if (!doVectors || numPostings == 0)
- return;
-
if (numPostings > maxNumPostings)
maxNumPostings = numPostings;
- final IndexOutput tvf = perThread.doc.perDocTvf;
-
// This is called once, after inverting all occurrences
// of a given field in the doc. At this point we flush
// our hash into the DocWriter.
assert fieldInfo.storeTermVector;
- assert perThread.vectorFieldsInOrder(fieldInfo);
+ assert termsWriter.vectorFieldsInOrder(fieldInfo);
- perThread.doc.addField(termsHashPerField.fieldInfo.number);
TermVectorsPostingsArray postings = (TermVectorsPostingsArray) termsHashPerField.postingsArray;
+ final IndexOutput tvf = termsWriter.tvf;
// TODO: we may want to make this sort in same order
// as Codec's terms dict?
@@ -140,21 +128,21 @@ final class TermVectorsTermsWriterPerFie
byte bits = 0x0;
if (doVectorPositions)
bits |= TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR;
- if (doVectorOffsets)
+ if (doVectorOffsets)
bits |= TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR;
tvf.writeByte(bits);
int lastLen = 0;
byte[] lastBytes = null;
int lastStart = 0;
-
- final ByteSliceReader reader = perThread.vectorSliceReader;
- final ByteBlockPool termBytePool = perThread.termsHashPerThread.termBytePool;
+
+ final ByteSliceReader reader = termsWriter.vectorSliceReader;
+ final ByteBlockPool termBytePool = termsHashPerField.termBytePool;
for(int j=0;j<numPostings;j++) {
final int termID = termIDs[j];
final int freq = postings.freqs[termID];
-
+
// Get BytesRef
termBytePool.setBytesRef(flushTerm, postings.textStarts[termID]);
@@ -192,20 +180,13 @@ final class TermVectorsTermsWriterPerFie
}
termsHashPerField.reset();
-
- // NOTE: we clear, per-field, at the thread level,
- // because term vectors fully write themselves on each
- // field; this saves RAM (eg if large doc has two large
- // fields w/ term vectors on) because we recycle/reuse
- // all RAM after each field:
- perThread.termsHashPerThread.reset(false);
}
void shrinkHash() {
termsHashPerField.shrinkHash(maxNumPostings);
maxNumPostings = 0;
}
-
+
@Override
void start(Fieldable f) {
if (doVectorOffsets) {
@@ -225,7 +206,7 @@ final class TermVectorsTermsWriterPerFie
if (doVectorOffsets) {
int startOffset = fieldState.offset + offsetAttribute.startOffset();
int endOffset = fieldState.offset + offsetAttribute.endOffset();
-
+
termsHashPerField.writeVInt(1, startOffset);
termsHashPerField.writeVInt(1, endOffset - startOffset);
postings.lastOffsets[termID] = endOffset;
@@ -243,13 +224,13 @@ final class TermVectorsTermsWriterPerFie
assert docState.testPoint("TermVectorsTermsWriterPerField.addTerm start");
TermVectorsPostingsArray postings = (TermVectorsPostingsArray) termsHashPerField.postingsArray;
-
+
postings.freqs[termID]++;
if (doVectorOffsets) {
int startOffset = fieldState.offset + offsetAttribute.startOffset();
int endOffset = fieldState.offset + offsetAttribute.endOffset();
-
+
termsHashPerField.writeVInt(1, startOffset - postings.lastOffsets[termID]);
termsHashPerField.writeVInt(1, endOffset - startOffset);
postings.lastOffsets[termID] = endOffset;
@@ -280,7 +261,7 @@ final class TermVectorsTermsWriterPerFie
int[] freqs; // How many times this term occurred in the current doc
int[] lastOffsets; // Last offset we saw
int[] lastPositions; // Last position where this term occurred
-
+
@Override
ParallelPostingsArray newInstance(int size) {
return new TermVectorsPostingsArray(size);
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/TermVectorsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/TermVectorsWriter.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/TermVectorsWriter.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/TermVectorsWriter.java Sat May 14 13:51:35 2011
@@ -20,12 +20,13 @@ package org.apache.lucene.index;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper;
import java.io.IOException;
final class TermVectorsWriter {
-
+
private IndexOutput tvx = null, tvd = null, tvf = null;
private FieldInfos fieldInfos;
@@ -46,7 +47,7 @@ final class TermVectorsWriter {
/**
* Add a complete document specified by all its term vectors. If document has no
* term vectors, add value for tvx.
- *
+ *
* @param vectors
* @throws IOException
*/
@@ -99,7 +100,7 @@ final class TermVectorsWriter {
final int[] freqs = vectors[i].getTermFrequencies();
for (int j=0; j<numTerms; j++) {
-
+
int start = j == 0 ? 0 : StringHelper.bytesDifference(terms[j-1].bytes,
terms[j-1].length,
terms[j].bytes,
@@ -181,30 +182,11 @@ final class TermVectorsWriter {
assert tvd.getFilePointer() == tvdPosition;
assert tvf.getFilePointer() == tvfPosition;
}
-
+
/** Close all streams. */
final void close() throws IOException {
// make an effort to close all streams we can but remember and re-throw
// the first exception encountered in this process
- IOException keep = null;
- if (tvx != null)
- try {
- tvx.close();
- } catch (IOException e) {
- keep = e;
- }
- if (tvd != null)
- try {
- tvd.close();
- } catch (IOException e) {
- if (keep == null) keep = e;
- }
- if (tvf != null)
- try {
- tvf.close();
- } catch (IOException e) {
- if (keep == null) keep = e;
- }
- if (keep != null) throw (IOException) keep.fillInStackTrace();
+ IOUtils.closeSafely(tvx, tvd, tvf);
}
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/TermsHash.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/TermsHash.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/TermsHash.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/TermsHash.java Sat May 14 13:51:35 2011
@@ -18,12 +18,12 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
-import java.util.Collection;
import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
import java.util.Map;
+import org.apache.lucene.util.ByteBlockPool;
+import org.apache.lucene.util.BytesRef;
+
/** This class implements {@link InvertedDocConsumer}, which
* is passed each token produced by the analyzer on each
* field. It stores these tokens in a hash table, and
@@ -36,78 +36,118 @@ final class TermsHash extends InvertedDo
final TermsHashConsumer consumer;
final TermsHash nextTermsHash;
- final DocumentsWriter docWriter;
+ final DocumentsWriterPerThread docWriter;
+
+ final IntBlockPool intPool;
+ final ByteBlockPool bytePool;
+ ByteBlockPool termBytePool;
+
+ final boolean primary;
+ final DocumentsWriterPerThread.DocState docState;
+
+ // Used when comparing postings via termRefComp, in TermsHashPerField
+ final BytesRef tr1 = new BytesRef();
+ final BytesRef tr2 = new BytesRef();
- boolean trackAllocations;
+ // Used by perField to obtain terms from the analysis chain
+ final BytesRef termBytesRef = new BytesRef(10);
- public TermsHash(final DocumentsWriter docWriter, boolean trackAllocations, final TermsHashConsumer consumer, final TermsHash nextTermsHash) {
+ final boolean trackAllocations;
+
+
+ public TermsHash(final DocumentsWriterPerThread docWriter, final TermsHashConsumer consumer, boolean trackAllocations, final TermsHash nextTermsHash) {
+ this.docState = docWriter.docState;
this.docWriter = docWriter;
this.consumer = consumer;
+ this.trackAllocations = trackAllocations;
this.nextTermsHash = nextTermsHash;
- this.trackAllocations = trackAllocations;
+ intPool = new IntBlockPool(docWriter);
+ bytePool = new ByteBlockPool(docWriter.byteBlockAllocator);
+
+ if (nextTermsHash != null) {
+ // We are primary
+ primary = true;
+ termBytePool = bytePool;
+ nextTermsHash.termBytePool = bytePool;
+ } else {
+ primary = false;
+ }
}
@Override
- InvertedDocConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) {
- return new TermsHashPerThread(docInverterPerThread, this, nextTermsHash, null);
+ public void abort() {
+ reset();
+ try {
+ consumer.abort();
+ } finally {
+ if (nextTermsHash != null) {
+ nextTermsHash.abort();
+ }
+ }
}
- TermsHashPerThread addThread(DocInverterPerThread docInverterPerThread, TermsHashPerThread primaryPerThread) {
- return new TermsHashPerThread(docInverterPerThread, this, nextTermsHash, primaryPerThread);
- }
+ // Clear all state
+ void reset() {
+ intPool.reset();
+ bytePool.reset();
- @Override
- public void abort() {
- consumer.abort();
- if (nextTermsHash != null)
- nextTermsHash.abort();
+ if (primary) {
+ bytePool.reset();
+ }
}
@Override
- synchronized void flush(Map<InvertedDocConsumerPerThread,Collection<InvertedDocConsumerPerField>> threadsAndFields, final SegmentWriteState state) throws IOException {
- Map<TermsHashConsumerPerThread,Collection<TermsHashConsumerPerField>> childThreadsAndFields = new HashMap<TermsHashConsumerPerThread,Collection<TermsHashConsumerPerField>>();
- Map<InvertedDocConsumerPerThread,Collection<InvertedDocConsumerPerField>> nextThreadsAndFields;
-
- if (nextTermsHash != null)
- nextThreadsAndFields = new HashMap<InvertedDocConsumerPerThread,Collection<InvertedDocConsumerPerField>>();
- else
- nextThreadsAndFields = null;
+ void flush(Map<FieldInfo,InvertedDocConsumerPerField> fieldsToFlush, final SegmentWriteState state) throws IOException {
+ Map<FieldInfo,TermsHashConsumerPerField> childFields = new HashMap<FieldInfo,TermsHashConsumerPerField>();
+ Map<FieldInfo,InvertedDocConsumerPerField> nextChildFields;
+
+ if (nextTermsHash != null) {
+ nextChildFields = new HashMap<FieldInfo,InvertedDocConsumerPerField>();
+ } else {
+ nextChildFields = null;
+ }
- for (final Map.Entry<InvertedDocConsumerPerThread,Collection<InvertedDocConsumerPerField>> entry : threadsAndFields.entrySet()) {
+ for (final Map.Entry<FieldInfo,InvertedDocConsumerPerField> entry : fieldsToFlush.entrySet()) {
+ TermsHashPerField perField = (TermsHashPerField) entry.getValue();
+ childFields.put(entry.getKey(), perField.consumer);
+ if (nextTermsHash != null) {
+ nextChildFields.put(entry.getKey(), perField.nextPerField);
+ }
+ }
- TermsHashPerThread perThread = (TermsHashPerThread) entry.getKey();
+ consumer.flush(childFields, state);
- Collection<InvertedDocConsumerPerField> fields = entry.getValue();
+ if (nextTermsHash != null) {
+ nextTermsHash.flush(nextChildFields, state);
+ }
+ }
- Iterator<InvertedDocConsumerPerField> fieldsIt = fields.iterator();
- Collection<TermsHashConsumerPerField> childFields = new HashSet<TermsHashConsumerPerField>();
- Collection<InvertedDocConsumerPerField> nextChildFields;
+ @Override
+ InvertedDocConsumerPerField addField(DocInverterPerField docInverterPerField, final FieldInfo fieldInfo) {
+ return new TermsHashPerField(docInverterPerField, this, nextTermsHash, fieldInfo);
+ }
- if (nextTermsHash != null)
- nextChildFields = new HashSet<InvertedDocConsumerPerField>();
- else
- nextChildFields = null;
+ @Override
+ public boolean freeRAM() {
+ return false;
+ }
- while(fieldsIt.hasNext()) {
- TermsHashPerField perField = (TermsHashPerField) fieldsIt.next();
- childFields.add(perField.consumer);
- if (nextTermsHash != null)
- nextChildFields.add(perField.nextPerField);
+ @Override
+ void finishDocument() throws IOException {
+ try {
+ consumer.finishDocument(this);
+ } finally {
+ if (nextTermsHash != null) {
+ nextTermsHash.consumer.finishDocument(nextTermsHash);
}
-
- childThreadsAndFields.put(perThread.consumer, childFields);
- if (nextTermsHash != null)
- nextThreadsAndFields.put(perThread.nextPerThread, nextChildFields);
}
-
- consumer.flush(childThreadsAndFields, state);
-
- if (nextTermsHash != null)
- nextTermsHash.flush(nextThreadsAndFields, state);
}
@Override
- synchronized public boolean freeRAM() {
- return false;
+ void startDocument() throws IOException {
+ consumer.startDocument();
+ if (nextTermsHash != null) {
+ nextTermsHash.consumer.startDocument();
+ }
}
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/TermsHashConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/TermsHashConsumer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/TermsHashConsumer.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/TermsHashConsumer.java Sat May 14 13:51:35 2011
@@ -18,11 +18,12 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
-import java.util.Collection;
import java.util.Map;
abstract class TermsHashConsumer {
- abstract TermsHashConsumerPerThread addThread(TermsHashPerThread perThread);
- abstract void flush(Map<TermsHashConsumerPerThread,Collection<TermsHashConsumerPerField>> threadsAndFields, final SegmentWriteState state) throws IOException;
+ abstract void flush(Map<FieldInfo, TermsHashConsumerPerField> fieldsToFlush, final SegmentWriteState state) throws IOException;
abstract void abort();
- }
+ abstract void startDocument() throws IOException;
+ abstract void finishDocument(TermsHash termsHash) throws IOException;
+ abstract public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo);
+}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java Sat May 14 13:51:35 2011
@@ -34,9 +34,10 @@ final class TermsHashPerField extends In
final TermsHashConsumerPerField consumer;
+ final TermsHash termsHash;
+
final TermsHashPerField nextPerField;
- final TermsHashPerThread perThread;
- final DocumentsWriter.DocState docState;
+ final DocumentsWriterPerThread.DocState docState;
final FieldInvertState fieldState;
TermToBytesRefAttribute termAtt;
BytesRef termBytesRef;
@@ -52,27 +53,27 @@ final class TermsHashPerField extends In
final FieldInfo fieldInfo;
final BytesRefHash bytesHash;
-
+
ParallelPostingsArray postingsArray;
private final AtomicLong bytesUsed;
- public TermsHashPerField(DocInverterPerField docInverterPerField, final TermsHashPerThread perThread, final TermsHashPerThread nextPerThread, final FieldInfo fieldInfo) {
- this.perThread = perThread;
- intPool = perThread.intPool;
- bytePool = perThread.bytePool;
- termBytePool = perThread.termBytePool;
- docState = perThread.docState;
- bytesUsed = perThread.termsHash.trackAllocations?perThread.termsHash.docWriter.bytesUsed:new AtomicLong();
-
+ public TermsHashPerField(DocInverterPerField docInverterPerField, final TermsHash termsHash, final TermsHash nextTermsHash, final FieldInfo fieldInfo) {
+ intPool = termsHash.intPool;
+ bytePool = termsHash.bytePool;
+ termBytePool = termsHash.termBytePool;
+ docState = termsHash.docState;
+ this.termsHash = termsHash;
+ bytesUsed = termsHash.trackAllocations ? termsHash.docWriter.bytesUsed
+ : new AtomicLong();
fieldState = docInverterPerField.fieldState;
- this.consumer = perThread.consumer.addField(this, fieldInfo);
+ this.consumer = termsHash.consumer.addField(this, fieldInfo);
PostingsBytesStartArray byteStarts = new PostingsBytesStartArray(this, bytesUsed);
- bytesHash = new BytesRefHash(termBytePool, HASH_INIT_SIZE, byteStarts);
+ bytesHash = new BytesRefHash(termBytePool, HASH_INIT_SIZE, byteStarts);
streamCount = consumer.getStreamCount();
numPostingInt = 2*streamCount;
this.fieldInfo = fieldInfo;
- if (nextPerThread != null)
- nextPerField = (TermsHashPerField) nextPerThread.addField(docInverterPerField, fieldInfo);
+ if (nextTermsHash != null)
+ nextPerField = (TermsHashPerField) nextTermsHash.addField(docInverterPerField, fieldInfo);
else
nextPerField = null;
}
@@ -80,7 +81,7 @@ final class TermsHashPerField extends In
void shrinkHash(int targetSize) {
// Fully free the bytesHash on each flush but keep the pool untouched
// bytesHash.clear will clear the ByteStartArray and in turn the ParallelPostingsArray too
- bytesHash.clear(false);
+ bytesHash.clear(false);
}
public void reset() {
@@ -90,7 +91,7 @@ final class TermsHashPerField extends In
}
@Override
- synchronized public void abort() {
+ public void abort() {
reset();
if (nextPerField != null)
nextPerField.abort();
@@ -99,14 +100,13 @@ final class TermsHashPerField extends In
public void initReader(ByteSliceReader reader, int termID, int stream) {
assert stream < streamCount;
int intStart = postingsArray.intStarts[termID];
- final int[] ints = intPool.buffers[intStart >> DocumentsWriter.INT_BLOCK_SHIFT];
- final int upto = intStart & DocumentsWriter.INT_BLOCK_MASK;
+ final int[] ints = intPool.buffers[intStart >> DocumentsWriterPerThread.INT_BLOCK_SHIFT];
+ final int upto = intStart & DocumentsWriterPerThread.INT_BLOCK_MASK;
reader.init(bytePool,
postingsArray.byteStarts[termID]+stream*ByteBlockPool.FIRST_LEVEL_SIZE,
ints[upto+stream]);
}
-
/** Collapse the hash table & sort in-place. */
public int[] sortPostings(Comparator<BytesRef> termComp) {
return bytesHash.sort(termComp);
@@ -124,7 +124,7 @@ final class TermsHashPerField extends In
nextPerField.start(f);
}
}
-
+
@Override
boolean start(Fieldable[] fields, int count) throws IOException {
doCall = consumer.start(fields, count);
@@ -143,11 +143,12 @@ final class TermsHashPerField extends In
// First time we are seeing this token since we last
// flushed the hash.
// Init stream slices
- if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE)
+ if (numPostingInt + intPool.intUpto > DocumentsWriterPerThread.INT_BLOCK_SIZE)
intPool.nextBuffer();
- if (ByteBlockPool.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE)
+ if (ByteBlockPool.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE) {
bytePool.nextBuffer();
+ }
intUptos = intPool.buffer;
intUptoStart = intPool.intUpto;
@@ -166,8 +167,8 @@ final class TermsHashPerField extends In
} else {
termID = (-termID)-1;
int intStart = postingsArray.intStarts[termID];
- intUptos = intPool.buffers[intStart >> DocumentsWriter.INT_BLOCK_SHIFT];
- intUptoStart = intStart & DocumentsWriter.INT_BLOCK_MASK;
+ intUptos = intPool.buffers[intStart >> DocumentsWriterPerThread.INT_BLOCK_SHIFT];
+ intUptoStart = intStart & DocumentsWriterPerThread.INT_BLOCK_MASK;
consumer.addTerm(termID);
}
}
@@ -192,7 +193,7 @@ final class TermsHashPerField extends In
if (docState.maxTermPrefix == null) {
final int saved = termBytesRef.length;
try {
- termBytesRef.length = Math.min(30, DocumentsWriter.MAX_TERM_LENGTH_UTF8);
+ termBytesRef.length = Math.min(30, DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8);
docState.maxTermPrefix = termBytesRef.toString();
} finally {
termBytesRef.length = saved;
@@ -204,7 +205,7 @@ final class TermsHashPerField extends In
if (termID >= 0) {// New posting
bytesHash.byteStart(termID);
// Init stream slices
- if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE) {
+ if (numPostingInt + intPool.intUpto > DocumentsWriterPerThread.INT_BLOCK_SIZE) {
intPool.nextBuffer();
}
@@ -229,8 +230,8 @@ final class TermsHashPerField extends In
} else {
termID = (-termID)-1;
final int intStart = postingsArray.intStarts[termID];
- intUptos = intPool.buffers[intStart >> DocumentsWriter.INT_BLOCK_SHIFT];
- intUptoStart = intStart & DocumentsWriter.INT_BLOCK_MASK;
+ intUptos = intPool.buffers[intStart >> DocumentsWriterPerThread.INT_BLOCK_SHIFT];
+ intUptoStart = intStart & DocumentsWriterPerThread.INT_BLOCK_MASK;
consumer.addTerm(termID);
}
@@ -278,7 +279,7 @@ final class TermsHashPerField extends In
if (nextPerField != null)
nextPerField.finish();
}
-
+
private static final class PostingsBytesStartArray extends BytesStartArray {
private final TermsHashPerField perField;
@@ -289,10 +290,10 @@ final class TermsHashPerField extends In
this.perField = perField;
this.bytesUsed = bytesUsed;
}
-
+
@Override
public int[] init() {
- if(perField.postingsArray == null) {
+ if(perField.postingsArray == null) {
perField.postingsArray = perField.consumer.createPostingsArray(2);
bytesUsed.addAndGet(perField.postingsArray.size * perField.postingsArray.bytesPerPosting());
}
@@ -312,7 +313,7 @@ final class TermsHashPerField extends In
@Override
public int[] clear() {
if(perField.postingsArray != null) {
- bytesUsed.addAndGet(-perField.postingsArray.size * perField.postingsArray.bytesPerPosting());
+ bytesUsed.addAndGet(-(perField.postingsArray.size * perField.postingsArray.bytesPerPosting()));
perField.postingsArray = null;
}
return null;