You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/10/21 20:58:44 UTC
svn commit: r1534320 [8/39] - in /lucene/dev/branches/lucene4956: ./
dev-tools/ dev-tools/idea/.idea/ dev-tools/idea/lucene/expressions/
dev-tools/idea/solr/contrib/velocity/ dev-tools/maven/
dev-tools/maven/lucene/ dev-tools/maven/lucene/expressions/ ...
Modified: lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java Mon Oct 21 18:58:24 2013
@@ -18,20 +18,18 @@ package org.apache.lucene.codecs.sep;
*/
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
+import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
-import org.apache.lucene.codecs.PostingsWriterBase;
-import org.apache.lucene.codecs.TermStats;
+import org.apache.lucene.codecs.PushPostingsWriterBase;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
@@ -39,7 +37,7 @@ import org.apache.lucene.util.IOUtils;
* to .pyl, skip data to .skp
*
* @lucene.experimental */
-public final class SepPostingsWriter extends PostingsWriterBase {
+public final class SepPostingsWriter extends PushPostingsWriterBase {
final static String CODEC = "SepPostingsWriter";
final static String DOC_EXTENSION = "doc";
@@ -64,7 +62,6 @@ public final class SepPostingsWriter ext
IndexOutput payloadOut;
IndexOutput skipOut;
- IndexOutput termsOut;
final SepSkipListWriter skipListWriter;
/** Expert: The fraction of TermDocs entries stored in skip tables,
@@ -87,19 +84,17 @@ public final class SepPostingsWriter ext
final int totalNumDocs;
- boolean storePayloads;
IndexOptions indexOptions;
- FieldInfo fieldInfo;
-
int lastPayloadLength;
int lastPosition;
long payloadStart;
int lastDocID;
int df;
- // Holds pending byte[] blob for the current terms block
- private final RAMOutputStream indexBytesWriter = new RAMOutputStream();
+ SepTermState lastState;
+ long lastPayloadFP;
+ long lastSkipFP;
public SepPostingsWriter(SegmentWriteState state, IntStreamFactory factory) throws IOException {
this(state, factory, DEFAULT_SKIP_INTERVAL);
@@ -116,9 +111,10 @@ public final class SepPostingsWriter ext
this.skipInterval = skipInterval;
this.skipMinimum = skipInterval; /* set to the same for now */
final String docFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, DOC_EXTENSION);
+
docOut = factory.createOutput(state.directory, docFileName, state.context);
docIndex = docOut.index();
-
+
if (state.fieldInfos.hasFreq()) {
final String frqFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FREQ_EXTENSION);
freqOut = factory.createOutput(state.directory, frqFileName, state.context);
@@ -134,7 +130,7 @@ public final class SepPostingsWriter ext
final String payloadFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, PAYLOAD_EXTENSION);
payloadOut = state.directory.createOutput(payloadFileName, state.context);
}
-
+
final String skipFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, SKIP_EXTENSION);
skipOut = state.directory.createOutput(skipFileName, state.context);
@@ -155,8 +151,7 @@ public final class SepPostingsWriter ext
}
@Override
- public void start(IndexOutput termsOut) throws IOException {
- this.termsOut = termsOut;
+ public void init(IndexOutput termsOut) throws IOException {
CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
// TODO: -- just ask skipper to "start" here
termsOut.writeInt(skipInterval); // write skipInterval
@@ -165,6 +160,11 @@ public final class SepPostingsWriter ext
}
@Override
+ public BlockTermState newTermState() {
+ return new SepTermState();
+ }
+
+ @Override
public void startTerm() throws IOException {
docIndex.mark();
//System.out.println("SEPW: startTerm docIndex=" + docIndex);
@@ -185,14 +185,31 @@ public final class SepPostingsWriter ext
// Currently, this instance is re-used across fields, so
// our parent calls setField whenever the field changes
@Override
- public void setField(FieldInfo fieldInfo) {
- this.fieldInfo = fieldInfo;
+ public int setField(FieldInfo fieldInfo) {
+ super.setField(fieldInfo);
this.indexOptions = fieldInfo.getIndexOptions();
if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
throw new UnsupportedOperationException("this codec cannot index offsets");
}
skipListWriter.setIndexOptions(indexOptions);
- storePayloads = indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS && fieldInfo.hasPayloads();
+ lastPayloadFP = 0;
+ lastSkipFP = 0;
+ lastState = setEmptyState();
+ return 0;
+ }
+
+ private SepTermState setEmptyState() {
+ SepTermState emptyState = new SepTermState();
+ emptyState.docIndex = docOut.index();
+ if (indexOptions != IndexOptions.DOCS_ONLY) {
+ emptyState.freqIndex = freqOut.index();
+ if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+ emptyState.posIndex = posOut.index();
+ }
+ }
+ emptyState.payloadFP = 0;
+ emptyState.skipFP = 0;
+ return emptyState;
}
/** Adds a new doc in this term. If this returns null
@@ -211,7 +228,7 @@ public final class SepPostingsWriter ext
// TODO: -- awkward we have to make these two
// separate calls to skipper
//System.out.println(" buffer skip lastDocID=" + lastDocID);
- skipListWriter.setSkipData(lastDocID, storePayloads, lastPayloadLength);
+ skipListWriter.setSkipData(lastDocID, writePayloads, lastPayloadLength);
skipListWriter.bufferSkip(df);
}
@@ -232,7 +249,7 @@ public final class SepPostingsWriter ext
assert delta >= 0: "position=" + position + " lastPosition=" + lastPosition; // not quite right (if pos=0 is repeated twice we don't catch it)
lastPosition = position;
- if (storePayloads) {
+ if (writePayloads) {
final int payloadLength = payload == null ? 0 : payload.length;
if (payloadLength != lastPayloadLength) {
lastPayloadLength = payloadLength;
@@ -260,132 +277,86 @@ public final class SepPostingsWriter ext
lastPosition = 0;
}
- private static class PendingTerm {
- public final IntIndexOutput.Index docIndex;
- public final IntIndexOutput.Index freqIndex;
- public final IntIndexOutput.Index posIndex;
- public final long payloadFP;
- public final long skipFP;
-
- public PendingTerm(IntIndexOutput.Index docIndex, IntIndexOutput.Index freqIndex, IntIndexOutput.Index posIndex, long payloadFP, long skipFP) {
- this.docIndex = docIndex;
- this.freqIndex = freqIndex;
- this.posIndex = posIndex;
- this.payloadFP = payloadFP;
- this.skipFP = skipFP;
- }
+ private static class SepTermState extends BlockTermState {
+ public IntIndexOutput.Index docIndex;
+ public IntIndexOutput.Index freqIndex;
+ public IntIndexOutput.Index posIndex;
+ public long payloadFP;
+ public long skipFP;
}
- private final List<PendingTerm> pendingTerms = new ArrayList<PendingTerm>();
-
/** Called when we are done adding docs to this term */
@Override
- public void finishTerm(TermStats stats) throws IOException {
+ public void finishTerm(BlockTermState _state) throws IOException {
+ SepTermState state = (SepTermState)_state;
// TODO: -- wasteful we are counting this in two places?
- assert stats.docFreq > 0;
- assert stats.docFreq == df;
+ assert state.docFreq > 0;
+ assert state.docFreq == df;
- final IntIndexOutput.Index docIndexCopy = docOut.index();
- docIndexCopy.copyFrom(docIndex, false);
-
- final IntIndexOutput.Index freqIndexCopy;
- final IntIndexOutput.Index posIndexCopy;
+ state.docIndex = docOut.index();
+ state.docIndex.copyFrom(docIndex, false);
if (indexOptions != IndexOptions.DOCS_ONLY) {
- freqIndexCopy = freqOut.index();
- freqIndexCopy.copyFrom(freqIndex, false);
+ state.freqIndex = freqOut.index();
+ state.freqIndex.copyFrom(freqIndex, false);
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
- posIndexCopy = posOut.index();
- posIndexCopy.copyFrom(posIndex, false);
+ state.posIndex = posOut.index();
+ state.posIndex.copyFrom(posIndex, false);
} else {
- posIndexCopy = null;
+ state.posIndex = null;
}
} else {
- freqIndexCopy = null;
- posIndexCopy = null;
+ state.freqIndex = null;
+ state.posIndex = null;
}
- final long skipFP;
if (df >= skipMinimum) {
- skipFP = skipOut.getFilePointer();
+ state.skipFP = skipOut.getFilePointer();
//System.out.println(" skipFP=" + skipFP);
skipListWriter.writeSkip(skipOut);
//System.out.println(" numBytes=" + (skipOut.getFilePointer()-skipFP));
} else {
- skipFP = -1;
+ state.skipFP = -1;
}
+ state.payloadFP = payloadStart;
lastDocID = 0;
df = 0;
-
- pendingTerms.add(new PendingTerm(docIndexCopy,
- freqIndexCopy,
- posIndexCopy,
- payloadStart,
- skipFP));
}
@Override
- public void flushTermsBlock(int start, int count) throws IOException {
- //System.out.println("SEPW: flushTermsBlock: start=" + start + " count=" + count + " pendingTerms.size()=" + pendingTerms.size() + " termsOut.fp=" + termsOut.getFilePointer());
- assert indexBytesWriter.getFilePointer() == 0;
- final int absStart = pendingTerms.size() - start;
- final List<PendingTerm> slice = pendingTerms.subList(absStart, absStart+count);
-
- long lastPayloadFP = 0;
- long lastSkipFP = 0;
-
- if (count == 0) {
- termsOut.writeByte((byte) 0);
- return;
- }
-
- final PendingTerm firstTerm = slice.get(0);
- final IntIndexOutput.Index docIndexFlush = firstTerm.docIndex;
- final IntIndexOutput.Index freqIndexFlush = firstTerm.freqIndex;
- final IntIndexOutput.Index posIndexFlush = firstTerm.posIndex;
-
- for(int idx=0;idx<slice.size();idx++) {
- final boolean isFirstTerm = idx == 0;
- final PendingTerm t = slice.get(idx);
- //System.out.println(" write idx=" + idx + " docIndex=" + t.docIndex);
- docIndexFlush.copyFrom(t.docIndex, false);
- docIndexFlush.write(indexBytesWriter, isFirstTerm);
- if (indexOptions != IndexOptions.DOCS_ONLY) {
- freqIndexFlush.copyFrom(t.freqIndex, false);
- freqIndexFlush.write(indexBytesWriter, isFirstTerm);
- //System.out.println(" freqIndex=" + t.freqIndex);
- if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
- posIndexFlush.copyFrom(t.posIndex, false);
- posIndexFlush.write(indexBytesWriter, isFirstTerm);
- //System.out.println(" posIndex=" + t.posIndex);
- if (storePayloads) {
- //System.out.println(" payloadFP=" + t.payloadFP);
- if (isFirstTerm) {
- indexBytesWriter.writeVLong(t.payloadFP);
- } else {
- indexBytesWriter.writeVLong(t.payloadFP - lastPayloadFP);
- }
- lastPayloadFP = t.payloadFP;
+ public void encodeTerm(long[] longs, DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
+ SepTermState state = (SepTermState)_state;
+ if (absolute) {
+ lastSkipFP = 0;
+ lastPayloadFP = 0;
+ lastState = state;
+ }
+ lastState.docIndex.copyFrom(state.docIndex, false);
+ lastState.docIndex.write(out, absolute);
+ if (indexOptions != IndexOptions.DOCS_ONLY) {
+ lastState.freqIndex.copyFrom(state.freqIndex, false);
+ lastState.freqIndex.write(out, absolute);
+ if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+ lastState.posIndex.copyFrom(state.posIndex, false);
+ lastState.posIndex.write(out, absolute);
+ if (writePayloads) {
+ if (absolute) {
+ out.writeVLong(state.payloadFP);
+ } else {
+ out.writeVLong(state.payloadFP - lastPayloadFP);
}
+ lastPayloadFP = state.payloadFP;
}
}
-
- if (t.skipFP != -1) {
- if (isFirstTerm) {
- indexBytesWriter.writeVLong(t.skipFP);
- } else {
- indexBytesWriter.writeVLong(t.skipFP - lastSkipFP);
- }
- lastSkipFP = t.skipFP;
- //System.out.println(" skipFP=" + t.skipFP);
+ }
+ if (state.skipFP != -1) {
+ if (absolute) {
+ out.writeVLong(state.skipFP);
+ } else {
+ out.writeVLong(state.skipFP - lastSkipFP);
}
+ lastSkipFP = state.skipFP;
}
-
- //System.out.println(" numBytes=" + indexBytesWriter.getFilePointer());
- termsOut.writeVLong((int) indexBytesWriter.getFilePointer());
- indexBytesWriter.writeTo(termsOut);
- indexBytesWriter.reset();
- slice.clear();
}
@Override
Modified: lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesFormat.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesFormat.java (original)
+++ lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesFormat.java Mon Oct 21 18:58:24 2013
@@ -38,12 +38,16 @@ import org.apache.lucene.index.SegmentWr
* minvalue 0
* pattern 000
* 005
+ * T
* 234
+ * T
* 123
+ * T
* ...
* </pre>
* so a document's value (delta encoded from minvalue) can be retrieved by
- * seeking to startOffset + (1+pattern.length())*docid. The extra 1 is the newline.
+ * seeking to startOffset + (1+pattern.length()+2)*docid. The extra 1 is the newline.
+ * The extra 2 is another newline and 'T' or 'F': true if the value is real, false if missing.
*
* for bytes this is also a "fixed-width" file, for example:
* <pre>
@@ -53,12 +57,15 @@ import org.apache.lucene.index.SegmentWr
* pattern 0
* length 6
* foobar[space][space]
+ * T
* length 3
* baz[space][space][space][space][space]
+ * T
* ...
* </pre>
- * so a doc's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength)*doc
+ * so a doc's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength+2)*doc
* the extra 9 is 2 newlines, plus "length " itself.
+ * the extra 2 is another newline and 'T' or 'F': true if the value is real, false if missing.
*
* for sorted bytes this is a fixed-width file, for example:
* <pre>
Modified: lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java (original)
+++ lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java Mon Oct 21 18:58:24 2013
@@ -17,6 +17,16 @@ package org.apache.lucene.codecs.simplet
* limitations under the License.
*/
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.END;
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.FIELD;
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.LENGTH;
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MAXLENGTH;
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MINVALUE;
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.NUMVALUES;
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.ORDPATTERN;
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.PATTERN;
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.TYPE;
+
import java.io.IOException;
import java.math.BigDecimal;
import java.math.BigInteger;
@@ -31,26 +41,17 @@ import org.apache.lucene.codecs.DocValue
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SortedDocValues;
-import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.StringHelper;
-import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.END;
-import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.FIELD;
-import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.LENGTH;
-import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MAXLENGTH;
-import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MINVALUE;
-import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.NUMVALUES;
-import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.ORDPATTERN;
-import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.PATTERN;
-import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.TYPE;
-
class SimpleTextDocValuesReader extends DocValuesProducer {
static class OneField {
@@ -61,7 +62,7 @@ class SimpleTextDocValuesReader extends
boolean fixedLength;
long minValue;
long numValues;
- };
+ }
final int maxDoc;
final IndexInput data;
@@ -69,7 +70,7 @@ class SimpleTextDocValuesReader extends
final Map<String,OneField> fields = new HashMap<String,OneField>();
public SimpleTextDocValuesReader(SegmentReadState state, String ext) throws IOException {
- //System.out.println("dir=" + state.directory + " seg=" + state.segmentInfo.name + " ext=" + ext);
+ // System.out.println("dir=" + state.directory + " seg=" + state.segmentInfo.name + " file=" + IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, ext));
data = state.directory.openInput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, ext), state.context);
maxDoc = state.segmentInfo.getDocCount();
while(true) {
@@ -81,8 +82,6 @@ class SimpleTextDocValuesReader extends
assert startsWith(FIELD) : scratch.utf8ToString();
String fieldName = stripPrefix(FIELD);
//System.out.println(" field=" + fieldName);
- FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldName);
- assert fieldInfo != null;
OneField field = new OneField();
fields.put(fieldName, field);
@@ -100,7 +99,7 @@ class SimpleTextDocValuesReader extends
assert startsWith(PATTERN);
field.pattern = stripPrefix(PATTERN);
field.dataStartFilePointer = data.getFilePointer();
- data.seek(data.getFilePointer() + (1+field.pattern.length()) * maxDoc);
+ data.seek(data.getFilePointer() + (1+field.pattern.length()+2) * maxDoc);
} else if (dvType == DocValuesType.BINARY) {
readLine();
assert startsWith(MAXLENGTH);
@@ -109,7 +108,7 @@ class SimpleTextDocValuesReader extends
assert startsWith(PATTERN);
field.pattern = stripPrefix(PATTERN);
field.dataStartFilePointer = data.getFilePointer();
- data.seek(data.getFilePointer() + (9+field.pattern.length()+field.maxLength) * maxDoc);
+ data.seek(data.getFilePointer() + (9+field.pattern.length()+field.maxLength+2) * maxDoc);
} else if (dvType == DocValuesType.SORTED || dvType == DocValuesType.SORTED_SET) {
readLine();
assert startsWith(NUMVALUES);
@@ -158,7 +157,7 @@ class SimpleTextDocValuesReader extends
if (docID < 0 || docID >= maxDoc) {
throw new IndexOutOfBoundsException("docID must be 0 .. " + (maxDoc-1) + "; got " + docID);
}
- in.seek(field.dataStartFilePointer + (1+field.pattern.length())*docID);
+ in.seek(field.dataStartFilePointer + (1+field.pattern.length()+2)*docID);
SimpleTextUtil.readLine(in, scratch);
//System.out.println("parsing delta: " + scratch.utf8ToString());
BigDecimal bd;
@@ -169,6 +168,7 @@ class SimpleTextDocValuesReader extends
e.initCause(pe);
throw e;
}
+ SimpleTextUtil.readLine(in, scratch); // read the line telling us if its real or not
return BigInteger.valueOf(field.minValue).add(bd.toBigIntegerExact()).longValue();
} catch (IOException ioe) {
throw new RuntimeException(ioe);
@@ -176,6 +176,30 @@ class SimpleTextDocValuesReader extends
}
};
}
+
+ private Bits getNumericDocsWithField(FieldInfo fieldInfo) throws IOException {
+ final OneField field = fields.get(fieldInfo.name);
+ final IndexInput in = data.clone();
+ final BytesRef scratch = new BytesRef();
+ return new Bits() {
+ @Override
+ public boolean get(int index) {
+ try {
+ in.seek(field.dataStartFilePointer + (1+field.pattern.length()+2)*index);
+ SimpleTextUtil.readLine(in, scratch); // data
+ SimpleTextUtil.readLine(in, scratch); // 'T' or 'F'
+ return scratch.bytes[scratch.offset] == (byte) 'T';
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
+ public int length() {
+ return maxDoc;
+ }
+ };
+ }
@Override
public BinaryDocValues getBinary(FieldInfo fieldInfo) throws IOException {
@@ -196,7 +220,7 @@ class SimpleTextDocValuesReader extends
if (docID < 0 || docID >= maxDoc) {
throw new IndexOutOfBoundsException("docID must be 0 .. " + (maxDoc-1) + "; got " + docID);
}
- in.seek(field.dataStartFilePointer + (9+field.pattern.length() + field.maxLength)*docID);
+ in.seek(field.dataStartFilePointer + (9+field.pattern.length() + field.maxLength+2)*docID);
SimpleTextUtil.readLine(in, scratch);
assert StringHelper.startsWith(scratch, LENGTH);
int len;
@@ -217,6 +241,45 @@ class SimpleTextDocValuesReader extends
}
};
}
+
+ private Bits getBinaryDocsWithField(FieldInfo fieldInfo) throws IOException {
+ final OneField field = fields.get(fieldInfo.name);
+ final IndexInput in = data.clone();
+ final BytesRef scratch = new BytesRef();
+ final DecimalFormat decoder = new DecimalFormat(field.pattern, new DecimalFormatSymbols(Locale.ROOT));
+
+ return new Bits() {
+ @Override
+ public boolean get(int index) {
+ try {
+ in.seek(field.dataStartFilePointer + (9+field.pattern.length() + field.maxLength+2)*index);
+ SimpleTextUtil.readLine(in, scratch);
+ assert StringHelper.startsWith(scratch, LENGTH);
+ int len;
+ try {
+ len = decoder.parse(new String(scratch.bytes, scratch.offset + LENGTH.length, scratch.length - LENGTH.length, "UTF-8")).intValue();
+ } catch (ParseException pe) {
+ CorruptIndexException e = new CorruptIndexException("failed to parse int length (resource=" + in + ")");
+ e.initCause(pe);
+ throw e;
+ }
+ // skip past bytes
+ byte bytes[] = new byte[len];
+ in.readBytes(bytes, 0, len);
+ SimpleTextUtil.readLine(in, scratch); // newline
+ SimpleTextUtil.readLine(in, scratch); // 'T' or 'F'
+ return scratch.bytes[scratch.offset] == (byte) 'T';
+ } catch (IOException ioe) {
+ throw new RuntimeException(ioe);
+ }
+ }
+
+ @Override
+ public int length() {
+ return maxDoc;
+ }
+ };
+ }
@Override
public SortedDocValues getSorted(FieldInfo fieldInfo) throws IOException {
@@ -241,7 +304,7 @@ class SimpleTextDocValuesReader extends
in.seek(field.dataStartFilePointer + field.numValues * (9 + field.pattern.length() + field.maxLength) + docID * (1 + field.ordPattern.length()));
SimpleTextUtil.readLine(in, scratch);
try {
- return ordDecoder.parse(scratch.utf8ToString()).intValue();
+ return (int) ordDecoder.parse(scratch.utf8ToString()).longValue()-1;
} catch (ParseException pe) {
CorruptIndexException e = new CorruptIndexException("failed to parse ord (resource=" + in + ")");
e.initCause(pe);
@@ -362,6 +425,22 @@ class SimpleTextDocValuesReader extends
}
};
}
+
+ @Override
+ public Bits getDocsWithField(FieldInfo field) throws IOException {
+ switch (field.getDocValuesType()) {
+ case SORTED_SET:
+ return new SortedSetDocsWithField(getSortedSet(field), maxDoc);
+ case SORTED:
+ return new SortedDocsWithField(getSorted(field), maxDoc);
+ case BINARY:
+ return getBinaryDocsWithField(field);
+ case NUMERIC:
+ return getNumericDocsWithField(field);
+ default:
+ throw new AssertionError();
+ }
+ }
@Override
public void close() throws IOException {
@@ -383,4 +462,9 @@ class SimpleTextDocValuesReader extends
private String stripPrefix(BytesRef prefix) throws IOException {
return new String(scratch.bytes, scratch.offset + prefix.length, scratch.length - prefix.length, "UTF-8");
}
+
+ @Override
+ public long ramBytesUsed() {
+ return 0;
+ }
}
Modified: lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java Mon Oct 21 18:58:24 2013
@@ -55,7 +55,7 @@ class SimpleTextDocValuesWriter extends
private final Set<String> fieldsSeen = new HashSet<String>(); // for asserting
public SimpleTextDocValuesWriter(SegmentWriteState state, String ext) throws IOException {
- //System.out.println("WRITE: " + IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, ext) + " " + state.segmentInfo.getDocCount() + " docs");
+ // System.out.println("WRITE: " + IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, ext) + " " + state.segmentInfo.getDocCount() + " docs");
data = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, ext), state.context);
numDocs = state.segmentInfo.getDocCount();
}
@@ -78,7 +78,7 @@ class SimpleTextDocValuesWriter extends
long minValue = Long.MAX_VALUE;
long maxValue = Long.MIN_VALUE;
for(Number n : values) {
- long v = n.longValue();
+ long v = n == null ? 0 : n.longValue();
minValue = Math.min(minValue, v);
maxValue = Math.max(maxValue, v);
}
@@ -112,13 +112,19 @@ class SimpleTextDocValuesWriter extends
// second pass to write the values
for(Number n : values) {
- long value = n.longValue();
+ long value = n == null ? 0 : n.longValue();
assert value >= minValue;
Number delta = BigInteger.valueOf(value).subtract(BigInteger.valueOf(minValue));
String s = encoder.format(delta);
assert s.length() == patternString.length();
SimpleTextUtil.write(data, s, scratch);
SimpleTextUtil.writeNewline(data);
+ if (n == null) {
+ SimpleTextUtil.write(data, "F", scratch);
+ } else {
+ SimpleTextUtil.write(data, "T", scratch);
+ }
+ SimpleTextUtil.writeNewline(data);
numDocsWritten++;
assert numDocsWritten <= numDocs;
}
@@ -132,7 +138,8 @@ class SimpleTextDocValuesWriter extends
assert field.getDocValuesType() == DocValuesType.BINARY;
int maxLength = 0;
for(BytesRef value : values) {
- maxLength = Math.max(maxLength, value.length);
+ final int length = value == null ? 0 : value.length;
+ maxLength = Math.max(maxLength, length);
}
writeFieldEntry(field, FieldInfo.DocValuesType.BINARY);
@@ -155,19 +162,28 @@ class SimpleTextDocValuesWriter extends
int numDocsWritten = 0;
for(BytesRef value : values) {
// write length
+ final int length = value == null ? 0 : value.length;
SimpleTextUtil.write(data, LENGTH);
- SimpleTextUtil.write(data, encoder.format(value.length), scratch);
+ SimpleTextUtil.write(data, encoder.format(length), scratch);
SimpleTextUtil.writeNewline(data);
// write bytes -- don't use SimpleText.write
// because it escapes:
- data.writeBytes(value.bytes, value.offset, value.length);
+ if (value != null) {
+ data.writeBytes(value.bytes, value.offset, value.length);
+ }
// pad to fit
- for (int i = value.length; i < maxLength; i++) {
+ for (int i = length; i < maxLength; i++) {
data.writeByte((byte)' ');
}
SimpleTextUtil.writeNewline(data);
+ if (value == null) {
+ SimpleTextUtil.write(data, "F", scratch);
+ } else {
+ SimpleTextUtil.write(data, "T", scratch);
+ }
+ SimpleTextUtil.writeNewline(data);
numDocsWritten++;
}
@@ -209,7 +225,7 @@ class SimpleTextDocValuesWriter extends
SimpleTextUtil.writeNewline(data);
final DecimalFormat encoder = new DecimalFormat(sb.toString(), new DecimalFormatSymbols(Locale.ROOT));
- int maxOrdBytes = Integer.toString(valueCount).length();
+ int maxOrdBytes = Long.toString(valueCount+1L).length();
sb.setLength(0);
for (int i = 0; i < maxOrdBytes; i++) {
sb.append('0');
@@ -246,7 +262,7 @@ class SimpleTextDocValuesWriter extends
assert valuesSeen == valueCount;
for(Number ord : docToOrd) {
- SimpleTextUtil.write(data, ordEncoder.format(ord.intValue()), scratch);
+ SimpleTextUtil.write(data, ordEncoder.format(ord.longValue()+1), scratch);
SimpleTextUtil.writeNewline(data);
}
}
Modified: lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java (original)
+++ lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java Mon Oct 21 18:58:24 2013
@@ -47,8 +47,8 @@ import static org.apache.lucene.codecs.s
public class SimpleTextFieldInfosReader extends FieldInfosReader {
@Override
- public FieldInfos read(Directory directory, String segmentName, IOContext iocontext) throws IOException {
- final String fileName = IndexFileNames.segmentFileName(segmentName, "", FIELD_INFOS_EXTENSION);
+ public FieldInfos read(Directory directory, String segmentName, String segmentSuffix, IOContext iocontext) throws IOException {
+ final String fileName = IndexFileNames.segmentFileName(segmentName, segmentSuffix, FIELD_INFOS_EXTENSION);
IndexInput input = directory.openInput(fileName, iocontext);
BytesRef scratch = new BytesRef();
@@ -105,6 +105,10 @@ public class SimpleTextFieldInfosReader
final DocValuesType docValuesType = docValuesType(dvType);
SimpleTextUtil.readLine(input, scratch);
+ assert StringHelper.startsWith(scratch, DOCVALUES_GEN);
+ final long dvGen = Long.parseLong(readString(DOCVALUES_GEN.length, scratch));
+
+ SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, NUM_ATTS);
int numAtts = Integer.parseInt(readString(NUM_ATTS.length, scratch));
Map<String,String> atts = new HashMap<String,String>();
@@ -122,6 +126,7 @@ public class SimpleTextFieldInfosReader
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
omitNorms, storePayloads, indexOptions, docValuesType, normsType, Collections.unmodifiableMap(atts));
+ infos[i].setDocValuesGen(dvGen);
}
if (input.getFilePointer() != input.length()) {
Modified: lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java Mon Oct 21 18:58:24 2013
@@ -53,14 +53,15 @@ public class SimpleTextFieldInfosWriter
static final BytesRef NORMS = new BytesRef(" norms ");
static final BytesRef NORMS_TYPE = new BytesRef(" norms type ");
static final BytesRef DOCVALUES = new BytesRef(" doc values ");
+ static final BytesRef DOCVALUES_GEN = new BytesRef(" doc values gen ");
static final BytesRef INDEXOPTIONS = new BytesRef(" index options ");
static final BytesRef NUM_ATTS = new BytesRef(" attributes ");
final static BytesRef ATT_KEY = new BytesRef(" key ");
final static BytesRef ATT_VALUE = new BytesRef(" value ");
@Override
- public void write(Directory directory, String segmentName, FieldInfos infos, IOContext context) throws IOException {
- final String fileName = IndexFileNames.segmentFileName(segmentName, "", FIELD_INFOS_EXTENSION);
+ public void write(Directory directory, String segmentName, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
+ final String fileName = IndexFileNames.segmentFileName(segmentName, segmentSuffix, FIELD_INFOS_EXTENSION);
IndexOutput out = directory.createOutput(fileName, context);
BytesRef scratch = new BytesRef();
boolean success = false;
@@ -108,6 +109,10 @@ public class SimpleTextFieldInfosWriter
SimpleTextUtil.write(out, DOCVALUES);
SimpleTextUtil.write(out, getDocValuesType(fi.getDocValuesType()), scratch);
SimpleTextUtil.writeNewline(out);
+
+ SimpleTextUtil.write(out, DOCVALUES_GEN);
+ SimpleTextUtil.write(out, Long.toString(fi.getDocValuesGen()), scratch);
+ SimpleTextUtil.writeNewline(out);
Map<String,String> atts = fi.attributes();
int numAtts = atts == null ? 0 : atts.size();
Modified: lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java (original)
+++ lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java Mon Oct 21 18:58:24 2013
@@ -19,7 +19,6 @@ package org.apache.lucene.codecs.simplet
import java.io.IOException;
import java.util.Collections;
-import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
@@ -218,11 +217,6 @@ class SimpleTextFieldsReader extends Fie
}
return docsAndPositionsEnum.reset(docsStart, liveDocs, indexOptions, docFreq);
}
-
- @Override
- public Comparator<BytesRef> getComparator() {
- return BytesRef.getUTF8SortedAsUnicodeComparator();
- }
}
private class SimpleTextDocsEnum extends DocsEnum {
@@ -574,6 +568,11 @@ class SimpleTextFieldsReader extends Fie
*/
//System.out.println("FST " + fst.sizeInBytes());
}
+
+ /** Returns approximate RAM bytes used */
+ public long ramBytesUsed() {
+ return (fst!=null) ? fst.sizeInBytes() : 0;
+ }
@Override
public TermsEnum iterator(TermsEnum reuse) throws IOException {
@@ -585,11 +584,6 @@ class SimpleTextFieldsReader extends Fie
}
@Override
- public Comparator<BytesRef> getComparator() {
- return BytesRef.getUTF8SortedAsUnicodeComparator();
- }
-
- @Override
public long size() {
return (long) termCount;
}
@@ -610,6 +604,11 @@ class SimpleTextFieldsReader extends Fie
}
@Override
+ public boolean hasFreqs() {
+ return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+ }
+
+ @Override
public boolean hasOffsets() {
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
}
@@ -630,7 +629,7 @@ class SimpleTextFieldsReader extends Fie
return Collections.unmodifiableSet(fields.keySet()).iterator();
}
- private final Map<String,Terms> termsCache = new HashMap<String,Terms>();
+ private final Map<String,SimpleTextTerms> termsCache = new HashMap<String,SimpleTextTerms>();
@Override
synchronized public Terms terms(String field) throws IOException {
@@ -641,7 +640,7 @@ class SimpleTextFieldsReader extends Fie
return null;
} else {
terms = new SimpleTextTerms(field, fp);
- termsCache.put(field, terms);
+ termsCache.put(field, (SimpleTextTerms) terms);
}
}
return terms;
@@ -656,4 +655,13 @@ class SimpleTextFieldsReader extends Fie
public void close() throws IOException {
in.close();
}
+
+ @Override
+ public long ramBytesUsed() {
+ long sizeInBytes = 0;
+ for(SimpleTextTerms simpleTextTerms : termsCache.values()) {
+ sizeInBytes += (simpleTextTerms!=null) ? simpleTextTerms.ramBytesUsed() : 0;
+ }
+ return sizeInBytes;
+ }
}
Modified: lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java Mon Oct 21 18:58:24 2013
@@ -17,23 +17,27 @@ package org.apache.lucene.codecs.simplet
* limitations under the License.
*/
-import org.apache.lucene.util.BytesRef;
+import java.io.Closeable;
+import java.io.IOException;
+
import org.apache.lucene.codecs.FieldsConsumer;
-import org.apache.lucene.codecs.PostingsConsumer;
-import org.apache.lucene.codecs.TermStats;
-import org.apache.lucene.codecs.TermsConsumer;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.Fields;
import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
-import java.io.IOException;
-import java.util.Comparator;
-
-class SimpleTextFieldsWriter extends FieldsConsumer {
+class SimpleTextFieldsWriter extends FieldsConsumer implements Closeable {
private final IndexOutput out;
private final BytesRef scratch = new BytesRef(10);
+ private final SegmentWriteState writeState;
final static BytesRef END = new BytesRef("END");
final static BytesRef FIELD = new BytesRef("field ");
@@ -45,134 +49,168 @@ class SimpleTextFieldsWriter extends Fie
final static BytesRef END_OFFSET = new BytesRef(" endOffset ");
final static BytesRef PAYLOAD = new BytesRef(" payload ");
- public SimpleTextFieldsWriter(SegmentWriteState state) throws IOException {
- final String fileName = SimpleTextPostingsFormat.getPostingsFileName(state.segmentInfo.name, state.segmentSuffix);
- out = state.directory.createOutput(fileName, state.context);
- }
-
- private void write(String s) throws IOException {
- SimpleTextUtil.write(out, s, scratch);
- }
-
- private void write(BytesRef b) throws IOException {
- SimpleTextUtil.write(out, b);
- }
-
- private void newline() throws IOException {
- SimpleTextUtil.writeNewline(out);
+ public SimpleTextFieldsWriter(SegmentWriteState writeState) throws IOException {
+ final String fileName = SimpleTextPostingsFormat.getPostingsFileName(writeState.segmentInfo.name, writeState.segmentSuffix);
+ out = writeState.directory.createOutput(fileName, writeState.context);
+ this.writeState = writeState;
}
@Override
- public TermsConsumer addField(FieldInfo field) throws IOException {
- write(FIELD);
- write(field.name);
- newline();
- return new SimpleTextTermsWriter(field);
- }
-
- private class SimpleTextTermsWriter extends TermsConsumer {
- private final SimpleTextPostingsWriter postingsWriter;
-
- public SimpleTextTermsWriter(FieldInfo field) {
- postingsWriter = new SimpleTextPostingsWriter(field);
- }
-
- @Override
- public PostingsConsumer startTerm(BytesRef term) throws IOException {
- return postingsWriter.reset(term);
- }
-
- @Override
- public void finishTerm(BytesRef term, TermStats stats) throws IOException {
- }
-
- @Override
- public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
- }
-
- @Override
- public Comparator<BytesRef> getComparator() {
- return BytesRef.getUTF8SortedAsUnicodeComparator();
+ public void write(Fields fields) throws IOException {
+ boolean success = false;
+ try {
+ write(writeState.fieldInfos, fields);
+ success = true;
+ } finally {
+ if (success) {
+ IOUtils.close(this);
+ } else {
+ IOUtils.closeWhileHandlingException(this);
+ }
}
}
- private class SimpleTextPostingsWriter extends PostingsConsumer {
- private BytesRef term;
- private boolean wroteTerm;
- private final IndexOptions indexOptions;
- private final boolean writePositions;
- private final boolean writeOffsets;
-
- // for assert:
- private int lastStartOffset = 0;
-
- public SimpleTextPostingsWriter(FieldInfo field) {
- this.indexOptions = field.getIndexOptions();
- writePositions = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
- writeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
- //System.out.println("writeOffsets=" + writeOffsets);
- //System.out.println("writePos=" + writePositions);
- }
+ public void write(FieldInfos fieldInfos, Fields fields) throws IOException {
- @Override
- public void startDoc(int docID, int termDocFreq) throws IOException {
- if (!wroteTerm) {
- // we lazily do this, in case the term had zero docs
- write(TERM);
- write(term);
- newline();
- wroteTerm = true;
+ // for each field
+ for(String field : fields) {
+ Terms terms = fields.terms(field);
+ if (terms == null) {
+ // Annoyingly, this can happen!
+ continue;
}
+ FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
- write(DOC);
- write(Integer.toString(docID));
- newline();
- if (indexOptions != IndexOptions.DOCS_ONLY) {
- write(FREQ);
- write(Integer.toString(termDocFreq));
- newline();
- }
-
- lastStartOffset = 0;
- }
-
- public PostingsConsumer reset(BytesRef term) {
- this.term = term;
- wroteTerm = false;
- return this;
- }
-
- @Override
- public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
- if (writePositions) {
- write(POS);
- write(Integer.toString(position));
- newline();
- }
+ boolean wroteField = false;
- if (writeOffsets) {
- assert endOffset >= startOffset;
- assert startOffset >= lastStartOffset: "startOffset=" + startOffset + " lastStartOffset=" + lastStartOffset;
- lastStartOffset = startOffset;
- write(START_OFFSET);
- write(Integer.toString(startOffset));
- newline();
- write(END_OFFSET);
- write(Integer.toString(endOffset));
- newline();
+ boolean hasPositions = terms.hasPositions();
+ boolean hasFreqs = terms.hasFreqs();
+ boolean hasPayloads = fieldInfo.hasPayloads();
+ boolean hasOffsets = terms.hasOffsets();
+
+ int flags = 0;
+ if (hasPositions) {
+
+ if (hasPayloads) {
+ flags = flags | DocsAndPositionsEnum.FLAG_PAYLOADS;
+ }
+ if (hasOffsets) {
+ flags = flags | DocsAndPositionsEnum.FLAG_OFFSETS;
+ }
+ } else {
+ if (hasFreqs) {
+ flags = flags | DocsEnum.FLAG_FREQS;
+ }
}
- if (payload != null && payload.length > 0) {
- assert payload.length != 0;
- write(PAYLOAD);
- write(payload);
- newline();
+ TermsEnum termsEnum = terms.iterator(null);
+ DocsAndPositionsEnum posEnum = null;
+ DocsEnum docsEnum = null;
+
+ // for each term in field
+ while(true) {
+ BytesRef term = termsEnum.next();
+ if (term == null) {
+ break;
+ }
+
+ if (hasPositions) {
+ posEnum = termsEnum.docsAndPositions(null, posEnum, flags);
+ docsEnum = posEnum;
+ } else {
+ docsEnum = termsEnum.docs(null, docsEnum, flags);
+ }
+ assert docsEnum != null: "termsEnum=" + termsEnum + " hasPos=" + hasPositions + " flags=" + flags;
+
+ boolean wroteTerm = false;
+
+ // for each doc in field+term
+ while(true) {
+ int doc = docsEnum.nextDoc();
+ if (doc == DocsEnum.NO_MORE_DOCS) {
+ break;
+ }
+
+ if (!wroteTerm) {
+
+ if (!wroteField) {
+ // we lazily do this, in case the field had
+ // no terms
+ write(FIELD);
+ write(field);
+ newline();
+ wroteField = true;
+ }
+
+ // we lazily do this, in case the term had
+ // zero docs
+ write(TERM);
+ write(term);
+ newline();
+ wroteTerm = true;
+ }
+
+ write(DOC);
+ write(Integer.toString(doc));
+ newline();
+ if (hasFreqs) {
+ int freq = docsEnum.freq();
+ write(FREQ);
+ write(Integer.toString(freq));
+ newline();
+
+ if (hasPositions) {
+ // for assert:
+ int lastStartOffset = 0;
+
+ // for each pos in field+term+doc
+ for(int i=0;i<freq;i++) {
+ int position = posEnum.nextPosition();
+
+ write(POS);
+ write(Integer.toString(position));
+ newline();
+
+ if (hasOffsets) {
+ int startOffset = posEnum.startOffset();
+ int endOffset = posEnum.endOffset();
+ assert endOffset >= startOffset;
+ assert startOffset >= lastStartOffset: "startOffset=" + startOffset + " lastStartOffset=" + lastStartOffset;
+ lastStartOffset = startOffset;
+ write(START_OFFSET);
+ write(Integer.toString(startOffset));
+ newline();
+ write(END_OFFSET);
+ write(Integer.toString(endOffset));
+ newline();
+ }
+
+ BytesRef payload = posEnum.getPayload();
+
+ if (payload != null && payload.length > 0) {
+ assert payload.length != 0;
+ write(PAYLOAD);
+ write(payload);
+ newline();
+ }
+ }
+ }
+ }
+ }
}
}
+ }
- @Override
- public void finishDoc() {
- }
+ private void write(String s) throws IOException {
+ SimpleTextUtil.write(out, s, scratch);
+ }
+
+ private void write(BytesRef b) throws IOException {
+ SimpleTextUtil.write(out, b);
+ }
+
+ private void newline() throws IOException {
+ SimpleTextUtil.writeNewline(out);
}
@Override
Modified: lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPostingsFormat.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPostingsFormat.java (original)
+++ lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPostingsFormat.java Mon Oct 21 18:58:24 2013
@@ -22,9 +22,9 @@ import java.io.IOException;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.index.SegmentWriteState;
-import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
/** For debugging, curiosity, transparency only!! Do not
* use this codec in production.
Modified: lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java (original)
+++ lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java Mon Oct 21 18:58:24 2013
@@ -17,8 +17,16 @@ package org.apache.lucene.codecs.simplet
* limitations under the License.
*/
+import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_KEY;
+import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_VALUE;
+import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DOCCOUNT;
+import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_FILE;
+import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_NUM_DIAG;
+import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_NUM_FILES;
+import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_USECOMPOUND;
+import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_VERSION;
+
import java.io.IOException;
-import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
@@ -34,8 +42,6 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper;
-import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.*;
-
/**
* reads plaintext segments files
* <p>
@@ -80,22 +86,6 @@ public class SimpleTextSegmentInfoReader
}
SimpleTextUtil.readLine(input, scratch);
- assert StringHelper.startsWith(scratch, SI_NUM_ATTS);
- int numAtts = Integer.parseInt(readString(SI_NUM_ATTS.length, scratch));
- Map<String,String> attributes = new HashMap<String,String>();
-
- for (int i = 0; i < numAtts; i++) {
- SimpleTextUtil.readLine(input, scratch);
- assert StringHelper.startsWith(scratch, SI_ATT_KEY);
- String key = readString(SI_ATT_KEY.length, scratch);
-
- SimpleTextUtil.readLine(input, scratch);
- assert StringHelper.startsWith(scratch, SI_ATT_VALUE);
- String value = readString(SI_ATT_VALUE.length, scratch);
- attributes.put(key, value);
- }
-
- SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, SI_NUM_FILES);
int numFiles = Integer.parseInt(readString(SI_NUM_FILES.length, scratch));
Set<String> files = new HashSet<String>();
@@ -108,7 +98,7 @@ public class SimpleTextSegmentInfoReader
}
SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount,
- isCompoundFile, null, diagnostics, Collections.unmodifiableMap(attributes));
+ isCompoundFile, null, diagnostics);
info.setFiles(files);
success = true;
return info;
Modified: lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java Mon Oct 21 18:58:24 2013
@@ -45,9 +45,6 @@ public class SimpleTextSegmentInfoWriter
final static BytesRef SI_NUM_DIAG = new BytesRef(" diagnostics ");
final static BytesRef SI_DIAG_KEY = new BytesRef(" key ");
final static BytesRef SI_DIAG_VALUE = new BytesRef(" value ");
- final static BytesRef SI_NUM_ATTS = new BytesRef(" attributes ");
- final static BytesRef SI_ATT_KEY = new BytesRef(" key ");
- final static BytesRef SI_ATT_VALUE = new BytesRef(" value ");
final static BytesRef SI_NUM_FILES = new BytesRef(" files ");
final static BytesRef SI_FILE = new BytesRef(" file ");
@@ -93,24 +90,6 @@ public class SimpleTextSegmentInfoWriter
}
}
- Map<String,String> atts = si.attributes();
- int numAtts = atts == null ? 0 : atts.size();
- SimpleTextUtil.write(output, SI_NUM_ATTS);
- SimpleTextUtil.write(output, Integer.toString(numAtts), scratch);
- SimpleTextUtil.writeNewline(output);
-
- if (numAtts > 0) {
- for (Map.Entry<String,String> entry : atts.entrySet()) {
- SimpleTextUtil.write(output, SI_ATT_KEY);
- SimpleTextUtil.write(output, entry.getKey(), scratch);
- SimpleTextUtil.writeNewline(output);
-
- SimpleTextUtil.write(output, SI_ATT_VALUE);
- SimpleTextUtil.write(output, entry.getValue(), scratch);
- SimpleTextUtil.writeNewline(output);
- }
- }
-
Set<String> files = si.files();
int numFiles = files == null ? 0 : files.size();
SimpleTextUtil.write(output, SI_NUM_FILES);
Modified: lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java (original)
+++ lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java Mon Oct 21 18:58:24 2013
@@ -192,4 +192,9 @@ public class SimpleTextStoredFieldsReade
return a.length == b.length - bOffset &&
ArrayUtil.equals(a.bytes, a.offset, b.bytes, b.offset + bOffset, b.length - bOffset);
}
+
+ @Override
+ public long ramBytesUsed() {
+ return 0;
+ }
}
Modified: lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java (original)
+++ lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java Mon Oct 21 18:58:24 2013
@@ -19,7 +19,6 @@ package org.apache.lucene.codecs.simplet
import java.io.IOException;
import java.util.Collections;
-import java.util.Comparator;
import java.util.Iterator;
import java.util.Map;
import java.util.SortedMap;
@@ -272,11 +271,6 @@ public class SimpleTextTermVectorsReader
}
@Override
- public Comparator<BytesRef> getComparator() {
- return BytesRef.getUTF8SortedAsUnicodeComparator();
- }
-
- @Override
public long size() throws IOException {
return terms.size();
}
@@ -297,6 +291,11 @@ public class SimpleTextTermVectorsReader
}
@Override
+ public boolean hasFreqs() {
+ return true;
+ }
+
+ @Override
public boolean hasOffsets() {
return hasOffsets;
}
@@ -394,11 +393,6 @@ public class SimpleTextTermVectorsReader
e.reset(liveDocs, postings.positions, postings.startOffsets, postings.endOffsets, postings.payloads);
return e;
}
-
- @Override
- public Comparator<BytesRef> getComparator() {
- return BytesRef.getUTF8SortedAsUnicodeComparator();
- }
}
// note: these two enum classes are exactly like the Default impl...
@@ -538,4 +532,9 @@ public class SimpleTextTermVectorsReader
return 1;
}
}
+
+ @Override
+ public long ramBytesUsed() {
+ return 0;
+ }
}
Modified: lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java Mon Oct 21 18:58:24 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.codecs.simplet
*/
import java.io.IOException;
-import java.util.Comparator;
import org.apache.lucene.codecs.TermVectorsWriter;
import org.apache.lucene.index.FieldInfo;
@@ -189,11 +188,6 @@ public class SimpleTextTermVectorsWriter
}
}
- @Override
- public Comparator<BytesRef> getComparator() throws IOException {
- return BytesRef.getUTF8SortedAsUnicodeComparator();
- }
-
private void write(String s) throws IOException {
SimpleTextUtil.write(out, s, scratch);
}
Modified: lucene/dev/branches/lucene4956/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat (original)
+++ lucene/dev/branches/lucene4956/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat Mon Oct 21 18:58:24 2013
@@ -14,4 +14,5 @@
# limitations under the License.
org.apache.lucene.codecs.diskdv.DiskDocValuesFormat
+org.apache.lucene.codecs.memory.MemoryDocValuesFormat
org.apache.lucene.codecs.simpletext.SimpleTextDocValuesFormat
Modified: lucene/dev/branches/lucene4956/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat (original)
+++ lucene/dev/branches/lucene4956/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat Mon Oct 21 18:58:24 2013
@@ -18,3 +18,7 @@ org.apache.lucene.codecs.simpletext.Simp
org.apache.lucene.codecs.memory.MemoryPostingsFormat
org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat
org.apache.lucene.codecs.memory.DirectPostingsFormat
+org.apache.lucene.codecs.memory.FSTPulsing41PostingsFormat
+org.apache.lucene.codecs.memory.FSTOrdPulsing41PostingsFormat
+org.apache.lucene.codecs.memory.FSTPostingsFormat
+org.apache.lucene.codecs.memory.FSTOrdPostingsFormat
Modified: lucene/dev/branches/lucene4956/lucene/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/common-build.xml?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/common-build.xml (original)
+++ lucene/dev/branches/lucene4956/lucene/common-build.xml Mon Oct 21 18:58:24 2013
@@ -360,7 +360,7 @@
<property name="ivy_install_path" location="${user.home}/.ant/lib" />
<property name="ivy_bootstrap_url1" value="http://repo1.maven.org/maven2"/>
<!-- you might need to tweak this from china so it works -->
- <property name="ivy_bootstrap_url2" value="http://mirror.netcologne.de/maven2"/>
+ <property name="ivy_bootstrap_url2" value="http://uk.maven.org/maven2"/>
<property name="ivy_checksum_sha1" value="c5ebf1c253ad4959a29f4acfe696ee48cdd9f473"/>
<target name="ivy-availability-check" unless="ivy.available">
@@ -490,12 +490,41 @@
<jarify/>
</target>
+ <property name="lucene.tgz.file" location="${common.dir}/dist/lucene-${version}.tgz"/>
+ <available file="${lucene.tgz.file}" property="lucene.tgz.exists"/>
+ <property name="lucene.tgz.unpack.dir" location="${common.build.dir}/lucene.tgz.unpacked"/>
+ <patternset id="patternset.lucene.solr.jars">
+ <include name="**/lucene-*.jar"/>
+ <include name="**/solr-*.jar"/>
+ </patternset>
+ <available type="dir" file="${lucene.tgz.unpack.dir}" property="lucene.tgz.unpack.dir.exists"/>
+ <target name="-ensure-lucene-tgz-exists" unless="lucene.tgz.exists">
+ <ant dir="${common.dir}" target="package-tgz" inheritall="false"/>
+ </target>
+ <target name="-unpack-lucene-tgz" unless="lucene.tgz.unpack.dir.exists">
+ <antcall target="-ensure-lucene-tgz-exists" inheritall="false"/>
+ <mkdir dir="${lucene.tgz.unpack.dir}"/>
+ <untar compression="gzip" src="${lucene.tgz.file}" dest="${lucene.tgz.unpack.dir}">
+ <patternset refid="patternset.lucene.solr.jars"/>
+ </untar>
+ </target>
+ <property name="dist.jar.dir.prefix" value="${lucene.tgz.unpack.dir}/lucene"/>
+ <pathconvert property="dist.jar.dir.suffix">
+ <mapper>
+ <chainedmapper>
+ <globmapper from="${common.dir}*" to="*"/>
+ <globmapper from="*build.xml" to="*"/>
+ </chainedmapper>
+ </mapper>
+ <path location="${ant.file}"/>
+ </pathconvert>
+
<macrodef name="m2-deploy" description="Builds a Maven artifact">
<element name="artifact-attachments" optional="yes"/>
<element name="parent-poms" optional="yes"/>
<element name="credentials" optional="yes"/>
<attribute name="pom.xml"/>
- <attribute name="jar.file" default="${build.dir}/${final.name}.jar"/>
+ <attribute name="jar.file" default="${dist.jar.dir.prefix}-${version}/${dist.jar.dir.suffix}/${final.name}.jar"/>
<sequential>
<artifact:install-provider artifactId="wagon-ssh" version="1.0-beta-7"/>
<parent-poms/>
@@ -1235,7 +1264,7 @@ ${tests-output}/junit4-*.suites - pe
-->
<target name="clover" depends="-clover.disable,-clover.load,-clover.classpath,-clover.setup" description="Instrument the Unit tests using Clover. To use, specify -Drun.clover=true on the command line."/>
- <target name="-clover.load" if="run.clover" unless="clover.loaded">
+ <target name="-clover.load" depends="ivy-availability-check,ivy-configure" if="run.clover" unless="clover.loaded">
<available file="${clover.license.path}" property="clover.license.available" />
<fail unless="clover.license.available"><![CDATA[.
@@ -1256,8 +1285,8 @@ ${tests-output}/junit4-*.suites - pe
]]></fail>
<echo>Code coverage with Atlassian Clover enabled.</echo>
- <ivy:cachepath organisation="com.cenqua.clover" module="clover" revision="3.1.10"
- inline="true" conf="master" type="jar" pathid="clover.classpath"/>
+ <ivy:cachepath organisation="com.cenqua.clover" module="clover" revision="3.2.0-SNAPSHOT"
+ inline="true" conf="master" pathid="clover.classpath"/>
<taskdef resource="cloverlib.xml" classpathref="clover.classpath" />
<mkdir dir="${clover.db.dir}"/>
<!-- This is a hack, instead of setting "clover.loaded" to "true", we set it
@@ -1353,9 +1382,9 @@ ${tests-output}/junit4-*.suites - pe
</target>
<target name="dist-maven"
- depends="filter-pom-templates, install-maven-tasks, m2-deploy-lucene-parent-pom, dist-maven-common"/>
+ depends="filter-pom-templates, install-maven-tasks, m2-deploy-lucene-parent-pom, -unpack-lucene-tgz, dist-maven-common"/>
<target name="dist-maven-common"
- depends="jar-core, jar-src, javadocs, install-maven-tasks, filter-pom-templates">
+ depends="jar-src, javadocs, install-maven-tasks, filter-pom-templates">
<sequential>
<property name="top.level.dir" location="${common.dir}/.."/>
<pathconvert property="pom.xml">
@@ -1379,9 +1408,9 @@ ${tests-output}/junit4-*.suites - pe
</target>
<target name="dist-maven-src-java"
- depends="filter-pom-templates, install-maven-tasks, m2-deploy-lucene-parent-pom, dist-maven-common-src-java"/>
+ depends="filter-pom-templates, install-maven-tasks, m2-deploy-lucene-parent-pom, -unpack-lucene-tgz, dist-maven-common-src-java"/>
<target name="dist-maven-common-src-java"
- depends="jar-core, jar-src, javadocs, install-maven-tasks, filter-pom-templates">
+ depends="-unpack-lucene-tgz, jar-src, javadocs, install-maven-tasks, filter-pom-templates">
<sequential>
<property name="top.level.dir" location="${common.dir}/.."/>
<pathconvert property="pom.xml">
@@ -1498,7 +1527,7 @@ ${tests-output}/junit4-*.suites - pe
</target>
<target name="rat-sources-typedef" unless="rat.loaded">
- <ivy:cachepath organisation="org.apache.rat" module="apache-rat" revision="0.9" transitive="false" inline="true" conf="master" type="jar" pathid="rat.classpath"/>
+ <ivy:cachepath organisation="org.apache.rat" module="apache-rat" revision="0.10" transitive="false" inline="true" conf="master" type="jar" pathid="rat.classpath"/>
<typedef resource="org/apache/rat/anttasks/antlib.xml" uri="antlib:org.apache.rat.anttasks" classpathref="rat.classpath"/>
<property name="rat.loaded" value="true"/>
</target>
@@ -1567,6 +1596,8 @@ ${tests-output}/junit4-*.suites - pe
<pattern substring="This file was generated automatically by the Snowball to Java compiler"/>
<!-- uima tests generated by JCasGen -->
<pattern substring="First created by JCasGen"/>
+ <!-- parsers generated by antlr -->
+ <pattern substring="ANTLR GENERATED CODE"/>
</rat:substringMatcher>
<!-- built in approved licenses -->
@@ -2064,6 +2095,9 @@ ${tests-output}/junit4-*.suites - pe
<target name="resolve-groovy" unless="groovy.loaded" depends="ivy-availability-check,ivy-configure">
<ivy:cachepath organisation="org.codehaus.groovy" module="groovy-all" revision="2.1.5"
inline="true" conf="default" type="jar" transitive="true" pathid="groovy.classpath"/>
+ <taskdef name="groovy"
+ classname="org.codehaus.groovy.ant.Groovy"
+ classpathref="groovy.classpath"/>
<property name="groovy.loaded" value="true"/>
</target>
Modified: lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java Mon Oct 21 18:58:24 2013
@@ -72,12 +72,15 @@ public abstract class Analyzer implement
private final ReuseStrategy reuseStrategy;
+ // non final as it gets nulled if closed; pkg private for access by ReuseStrategy's final helper methods:
+ CloseableThreadLocal<Object> storedValue = new CloseableThreadLocal<Object>();
+
/**
* Create a new Analyzer, reusing the same set of components per-thread
* across calls to {@link #tokenStream(String, Reader)}.
*/
public Analyzer() {
- this(new GlobalReuseStrategy());
+ this(GLOBAL_REUSE_STRATEGY);
}
/**
@@ -133,11 +136,11 @@ public abstract class Analyzer implement
*/
public final TokenStream tokenStream(final String fieldName,
final Reader reader) throws IOException {
- TokenStreamComponents components = reuseStrategy.getReusableComponents(fieldName);
+ TokenStreamComponents components = reuseStrategy.getReusableComponents(this, fieldName);
final Reader r = initReader(fieldName, reader);
if (components == null) {
components = createComponents(fieldName, r);
- reuseStrategy.setReusableComponents(fieldName, components);
+ reuseStrategy.setReusableComponents(this, fieldName, components);
} else {
components.setReader(r);
}
@@ -167,7 +170,7 @@ public abstract class Analyzer implement
* @see #tokenStream(String, Reader)
*/
public final TokenStream tokenStream(final String fieldName, final String text) throws IOException {
- TokenStreamComponents components = reuseStrategy.getReusableComponents(fieldName);
+ TokenStreamComponents components = reuseStrategy.getReusableComponents(this, fieldName);
@SuppressWarnings("resource") final ReusableStringReader strReader =
(components == null || components.reusableStringReader == null) ?
new ReusableStringReader() : components.reusableStringReader;
@@ -175,7 +178,7 @@ public abstract class Analyzer implement
final Reader r = initReader(fieldName, strReader);
if (components == null) {
components = createComponents(fieldName, r);
- reuseStrategy.setReusableComponents(fieldName, components);
+ reuseStrategy.setReusableComponents(this, fieldName, components);
} else {
components.setReader(r);
}
@@ -229,10 +232,20 @@ public abstract class Analyzer implement
return 1;
}
+ /**
+ * Returns the used {@link ReuseStrategy}.
+ */
+ public final ReuseStrategy getReuseStrategy() {
+ return reuseStrategy;
+ }
+
/** Frees persistent resources used by this Analyzer */
@Override
public void close() {
- reuseStrategy.close();
+ if (storedValue != null) {
+ storedValue.close();
+ storedValue = null;
+ }
}
/**
@@ -317,126 +330,101 @@ public abstract class Analyzer implement
* Strategy defining how TokenStreamComponents are reused per call to
* {@link Analyzer#tokenStream(String, java.io.Reader)}.
*/
- public static abstract class ReuseStrategy implements Closeable {
-
- private CloseableThreadLocal<Object> storedValue = new CloseableThreadLocal<Object>();
+ public static abstract class ReuseStrategy {
/** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
public ReuseStrategy() {}
/**
- * Gets the reusable TokenStreamComponents for the field with the given name
+ * Gets the reusable TokenStreamComponents for the field with the given name.
*
+ * @param analyzer Analyzer from which to get the reused components. Use
+ * {@link #getStoredValue(Analyzer)} and {@link #setStoredValue(Analyzer, Object)}
+ * to access the data on the Analyzer.
* @param fieldName Name of the field whose reusable TokenStreamComponents
* are to be retrieved
* @return Reusable TokenStreamComponents for the field, or {@code null}
* if there was no previous components for the field
*/
- public abstract TokenStreamComponents getReusableComponents(String fieldName);
+ public abstract TokenStreamComponents getReusableComponents(Analyzer analyzer, String fieldName);
/**
* Stores the given TokenStreamComponents as the reusable components for the
- * field with the give name
+ * field with the give name.
*
* @param fieldName Name of the field whose TokenStreamComponents are being set
* @param components TokenStreamComponents which are to be reused for the field
*/
- public abstract void setReusableComponents(String fieldName, TokenStreamComponents components);
+ public abstract void setReusableComponents(Analyzer analyzer, String fieldName, TokenStreamComponents components);
/**
- * Returns the currently stored value
+ * Returns the currently stored value.
*
* @return Currently stored value or {@code null} if no value is stored
- * @throws AlreadyClosedException if the ReuseStrategy is closed.
+ * @throws AlreadyClosedException if the Analyzer is closed.
*/
- protected final Object getStoredValue() {
- try {
- return storedValue.get();
- } catch (NullPointerException npe) {
- if (storedValue == null) {
- throw new AlreadyClosedException("this Analyzer is closed");
- } else {
- throw npe;
- }
+ protected final Object getStoredValue(Analyzer analyzer) {
+ if (analyzer.storedValue == null) {
+ throw new AlreadyClosedException("this Analyzer is closed");
}
+ return analyzer.storedValue.get();
}
/**
- * Sets the stored value
+ * Sets the stored value.
*
* @param storedValue Value to store
- * @throws AlreadyClosedException if the ReuseStrategy is closed.
+ * @throws AlreadyClosedException if the Analyzer is closed.
*/
- protected final void setStoredValue(Object storedValue) {
- try {
- this.storedValue.set(storedValue);
- } catch (NullPointerException npe) {
- if (storedValue == null) {
- throw new AlreadyClosedException("this Analyzer is closed");
- } else {
- throw npe;
- }
+ protected final void setStoredValue(Analyzer analyzer, Object storedValue) {
+ if (analyzer.storedValue == null) {
+ throw new AlreadyClosedException("this Analyzer is closed");
}
+ analyzer.storedValue.set(storedValue);
}
- /**
- * Closes the ReuseStrategy, freeing any resources
- */
- @Override
- public void close() {
- if (storedValue != null) {
- storedValue.close();
- storedValue = null;
- }
- }
}
/**
- * Implementation of {@link ReuseStrategy} that reuses the same components for
+ * A predefined {@link ReuseStrategy} that reuses the same components for
* every field.
*/
- public final static class GlobalReuseStrategy extends ReuseStrategy {
-
- /** Creates a new instance, with empty per-thread values */
- public GlobalReuseStrategy() {}
+ public static final ReuseStrategy GLOBAL_REUSE_STRATEGY = new ReuseStrategy() {
@Override
- public TokenStreamComponents getReusableComponents(String fieldName) {
- return (TokenStreamComponents) getStoredValue();
+ public TokenStreamComponents getReusableComponents(Analyzer analyzer, String fieldName) {
+ return (TokenStreamComponents) getStoredValue(analyzer);
}
@Override
- public void setReusableComponents(String fieldName, TokenStreamComponents components) {
- setStoredValue(components);
+ public void setReusableComponents(Analyzer analyzer, String fieldName, TokenStreamComponents components) {
+ setStoredValue(analyzer, components);
}
- }
+ };
/**
- * Implementation of {@link ReuseStrategy} that reuses components per-field by
+ * A predefined {@link ReuseStrategy} that reuses components per-field by
* maintaining a Map of TokenStreamComponent per field name.
*/
- public static class PerFieldReuseStrategy extends ReuseStrategy {
-
- /** Creates a new instance, with empty per-thread-per-field values */
- public PerFieldReuseStrategy() {}
+ public static final ReuseStrategy PER_FIELD_REUSE_STRATEGY = new ReuseStrategy() {
@SuppressWarnings("unchecked")
@Override
- public TokenStreamComponents getReusableComponents(String fieldName) {
- Map<String, TokenStreamComponents> componentsPerField = (Map<String, TokenStreamComponents>) getStoredValue();
+ public TokenStreamComponents getReusableComponents(Analyzer analyzer, String fieldName) {
+ Map<String, TokenStreamComponents> componentsPerField = (Map<String, TokenStreamComponents>) getStoredValue(analyzer);
return componentsPerField != null ? componentsPerField.get(fieldName) : null;
}
@SuppressWarnings("unchecked")
@Override
- public void setReusableComponents(String fieldName, TokenStreamComponents components) {
- Map<String, TokenStreamComponents> componentsPerField = (Map<String, TokenStreamComponents>) getStoredValue();
+ public void setReusableComponents(Analyzer analyzer, String fieldName, TokenStreamComponents components) {
+ Map<String, TokenStreamComponents> componentsPerField = (Map<String, TokenStreamComponents>) getStoredValue(analyzer);
if (componentsPerField == null) {
componentsPerField = new HashMap<String, TokenStreamComponents>();
- setStoredValue(componentsPerField);
+ setStoredValue(analyzer, componentsPerField);
}
componentsPerField.put(fieldName, components);
}
- }
+ };
}
Modified: lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java (original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java Mon Oct 21 18:58:24 2013
@@ -34,10 +34,27 @@ public abstract class AnalyzerWrapper ex
/**
* Creates a new AnalyzerWrapper. Since the {@link Analyzer.ReuseStrategy} of
- * the wrapped Analyzers are unknown, {@link Analyzer.PerFieldReuseStrategy} is assumed
+ * the wrapped Analyzers are unknown, {@link #PER_FIELD_REUSE_STRATEGY} is assumed.
+ * @deprecated Use {@link #AnalyzerWrapper(Analyzer.ReuseStrategy)}
+ * and specify a valid {@link Analyzer.ReuseStrategy}, probably retrieved from the
+ * wrapped analyzer using {@link #getReuseStrategy()}.
*/
+ @Deprecated
protected AnalyzerWrapper() {
- super(new PerFieldReuseStrategy());
+ this(PER_FIELD_REUSE_STRATEGY);
+ }
+
+ /**
+ * Creates a new AnalyzerWrapper with the given reuse strategy.
+ * <p>If you want to wrap a single delegate Analyzer you can probably
+ * reuse its strategy when instantiating this subclass:
+ * {@code super(delegate.getReuseStrategy());}.
+ * <p>If you choose different analyzers per field, use
+ * {@link #PER_FIELD_REUSE_STRATEGY}.
+ * @see #getReuseStrategy()
+ */
+ protected AnalyzerWrapper(ReuseStrategy reuseStrategy) {
+ super(reuseStrategy);
}
/**
Modified: lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/analysis/NumericTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/analysis/NumericTokenStream.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/analysis/NumericTokenStream.java (original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/analysis/NumericTokenStream.java Mon Oct 21 18:58:24 2013
@@ -160,16 +160,10 @@ public final class NumericTokenStream ex
@Override
public int fillBytesRef() {
- try {
- assert valueSize == 64 || valueSize == 32;
- return (valueSize == 64) ?
- NumericUtils.longToPrefixCoded(value, shift, bytes) :
- NumericUtils.intToPrefixCoded((int) value, shift, bytes);
- } catch (IllegalArgumentException iae) {
- // return empty token before first or after last
- bytes.length = 0;
- return 0;
- }
+ assert valueSize == 64 || valueSize == 32;
+ return (valueSize == 64) ?
+ NumericUtils.longToPrefixCoded(value, shift, bytes) :
+ NumericUtils.intToPrefixCoded((int) value, shift, bytes);
}
@Override
Modified: lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java (original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java Mon Oct 21 18:58:24 2013
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.io.Closeable;
import java.lang.reflect.Modifier;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
@@ -159,23 +160,41 @@ public abstract class TokenStream extend
* setting the final offset of a stream. The final offset of a stream might
* differ from the offset of the last token eg in case one or more whitespaces
* followed after the last token, but a WhitespaceTokenizer was used.
+ * <p>
+ * Additionally any skipped positions (such as those removed by a stopfilter)
+ * can be applied to the position increment, or any adjustment of other
+ * attributes where the end-of-stream value may be important.
+ * <p>
+ * If you override this method, always call {@code super.end()}.
*
* @throws IOException If an I/O error occurs
*/
public void end() throws IOException {
- // do nothing by default
+ clearAttributes(); // LUCENE-3849: don't consume dirty atts
+ if (hasAttribute(PositionIncrementAttribute.class)) {
+ getAttribute(PositionIncrementAttribute.class).setPositionIncrement(0);
+ }
}
/**
* This method is called by a consumer before it begins consumption using
* {@link #incrementToken()}.
- * <p/>
+ * <p>
* Resets this stream to a clean state. Stateful implementations must implement
* this method so that they can be reused, just as if they had been created fresh.
+ * <p>
+ * If you override this method, always call {@code super.reset()}, otherwise
+ * some internal state will not be correctly reset (e.g., {@link Tokenizer} will
+ * throw {@link IllegalStateException} on further usage).
*/
public void reset() throws IOException {}
- /** Releases resources associated with this stream. */
+ /** Releases resources associated with this stream.
+ * <p>
+ * If you override this method, always call {@code super.close()}, otherwise
+ * some internal state will not be correctly reset (e.g., {@link Tokenizer} will
+ * throw {@link IllegalStateException} on reuse).
+ */
@Override
public void close() throws IOException {}