You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ha...@apache.org on 2013/08/13 06:06:27 UTC
svn commit: r1513336 [3/11] - in /lucene/dev/branches/lucene3069/lucene: ./
analysis/ analysis/common/
analysis/common/src/java/org/apache/lucene/analysis/charfilter/
analysis/common/src/java/org/apache/lucene/analysis/hunspell/
analysis/common/src/jav...
Modified: lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java (original)
+++ lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java Tue Aug 13 04:06:18 2013
@@ -684,7 +684,7 @@ public final class DirectPostingsFormat
}
@Override
- public SeekStatus seekCeil(BytesRef term, boolean useCache) {
+ public SeekStatus seekCeil(BytesRef term) {
// TODO: we should use the skip pointers; should be
// faster than bin search; we should also hold
// & reuse current state so seeking forwards is
@@ -707,7 +707,7 @@ public final class DirectPostingsFormat
}
@Override
- public boolean seekExact(BytesRef term, boolean useCache) {
+ public boolean seekExact(BytesRef term) {
// TODO: we should use the skip pointers; should be
// faster than bin search; we should also hold
// & reuse current state so seeking forwards is
@@ -1413,7 +1413,7 @@ public final class DirectPostingsFormat
}
@Override
- public SeekStatus seekCeil(BytesRef term, boolean useCache) {
+ public SeekStatus seekCeil(BytesRef term) {
throw new UnsupportedOperationException();
}
Modified: lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java (original)
+++ lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java Tue Aug 13 04:06:18 2013
@@ -335,11 +335,11 @@ public final class MemoryPostingsFormat
public FSTDocsEnum reset(BytesRef bufferIn, Bits liveDocs, int numDocs) {
assert numDocs > 0;
- if (buffer.length < bufferIn.length - bufferIn.offset) {
- buffer = ArrayUtil.grow(buffer, bufferIn.length - bufferIn.offset);
+ if (buffer.length < bufferIn.length) {
+ buffer = ArrayUtil.grow(buffer, bufferIn.length);
}
- in.reset(buffer, 0, bufferIn.length - bufferIn.offset);
- System.arraycopy(bufferIn.bytes, bufferIn.offset, buffer, 0, bufferIn.length - bufferIn.offset);
+ in.reset(buffer, 0, bufferIn.length);
+ System.arraycopy(bufferIn.bytes, bufferIn.offset, buffer, 0, bufferIn.length);
this.liveDocs = liveDocs;
docID = -1;
accum = 0;
@@ -472,11 +472,11 @@ public final class MemoryPostingsFormat
// System.out.println(" " + Integer.toHexString(bufferIn.bytes[i]&0xFF));
// }
- if (buffer.length < bufferIn.length - bufferIn.offset) {
- buffer = ArrayUtil.grow(buffer, bufferIn.length - bufferIn.offset);
+ if (buffer.length < bufferIn.length) {
+ buffer = ArrayUtil.grow(buffer, bufferIn.length);
}
in.reset(buffer, 0, bufferIn.length - bufferIn.offset);
- System.arraycopy(bufferIn.bytes, bufferIn.offset, buffer, 0, bufferIn.length - bufferIn.offset);
+ System.arraycopy(bufferIn.bytes, bufferIn.offset, buffer, 0, bufferIn.length);
this.liveDocs = liveDocs;
docID = -1;
accum = 0;
@@ -632,6 +632,7 @@ public final class MemoryPostingsFormat
private int docFreq;
private long totalTermFreq;
private BytesRefFSTEnum.InputOutput<BytesRef> current;
+ private BytesRef postingsSpare = new BytesRef();
public FSTTermsEnum(FieldInfo field, FST<BytesRef> fst) {
this.field = field;
@@ -640,21 +641,23 @@ public final class MemoryPostingsFormat
private void decodeMetaData() {
if (!didDecode) {
- buffer.reset(current.output.bytes, 0, current.output.length);
+ buffer.reset(current.output.bytes, current.output.offset, current.output.length);
docFreq = buffer.readVInt();
if (field.getIndexOptions() != IndexOptions.DOCS_ONLY) {
totalTermFreq = docFreq + buffer.readVLong();
} else {
totalTermFreq = -1;
}
- current.output.offset = buffer.getPosition();
+ postingsSpare.bytes = current.output.bytes;
+ postingsSpare.offset = buffer.getPosition();
+ postingsSpare.length = current.output.length - (buffer.getPosition() - current.output.offset);
//System.out.println(" df=" + docFreq + " totTF=" + totalTermFreq + " offset=" + buffer.getPosition() + " len=" + current.output.length);
didDecode = true;
}
}
@Override
- public boolean seekExact(BytesRef text, boolean useCache /* ignored */) throws IOException {
+ public boolean seekExact(BytesRef text) throws IOException {
//System.out.println("te.seekExact text=" + field.name + ":" + text.utf8ToString() + " this=" + this);
current = fstEnum.seekExact(text);
didDecode = false;
@@ -662,7 +665,7 @@ public final class MemoryPostingsFormat
}
@Override
- public SeekStatus seekCeil(BytesRef text, boolean useCache /* ignored */) throws IOException {
+ public SeekStatus seekCeil(BytesRef text) throws IOException {
//System.out.println("te.seek text=" + field.name + ":" + text.utf8ToString() + " this=" + this);
current = fstEnum.seekCeil(text);
if (current == null) {
@@ -699,7 +702,7 @@ public final class MemoryPostingsFormat
docsEnum = new FSTDocsEnum(field.getIndexOptions(), field.hasPayloads());
}
}
- return docsEnum.reset(current.output, liveDocs, docFreq);
+ return docsEnum.reset(this.postingsSpare, liveDocs, docFreq);
}
@Override
@@ -720,7 +723,7 @@ public final class MemoryPostingsFormat
}
}
//System.out.println("D&P reset this=" + this);
- return docsAndPositionsEnum.reset(current.output, liveDocs, docFreq);
+ return docsAndPositionsEnum.reset(postingsSpare, liveDocs, docFreq);
}
@Override
Modified: lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java (original)
+++ lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java Tue Aug 13 04:06:18 2013
@@ -103,8 +103,7 @@ public abstract class PulsingPostingsFor
state.directory, state.fieldInfos, state.segmentInfo,
pulsingReader,
state.context,
- state.segmentSuffix,
- state.termsIndexDivisor);
+ state.segmentSuffix);
success = true;
return ret;
} finally {
Modified: lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java (original)
+++ lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java Tue Aug 13 04:06:18 2013
@@ -109,7 +109,7 @@ class SimpleTextFieldsReader extends Fie
}
@Override
- public boolean seekExact(BytesRef text, boolean useCache /* ignored */) throws IOException {
+ public boolean seekExact(BytesRef text) throws IOException {
final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> result = fstEnum.seekExact(text);
if (result != null) {
@@ -125,7 +125,7 @@ class SimpleTextFieldsReader extends Fie
}
@Override
- public SeekStatus seekCeil(BytesRef text, boolean useCache /* ignored */) throws IOException {
+ public SeekStatus seekCeil(BytesRef text) throws IOException {
//System.out.println("seek to text=" + text.utf8ToString());
final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> result = fstEnum.seekCeil(text);
Modified: lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java (original)
+++ lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java Tue Aug 13 04:06:18 2013
@@ -331,7 +331,7 @@ public class SimpleTextTermVectorsReader
}
@Override
- public SeekStatus seekCeil(BytesRef text, boolean useCache) throws IOException {
+ public SeekStatus seekCeil(BytesRef text) throws IOException {
iterator = terms.tailMap(text).entrySet().iterator();
if (!iterator.hasNext()) {
return SeekStatus.END;
Modified: lucene/dev/branches/lucene3069/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestFixedGapPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestFixedGapPostingsFormat.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestFixedGapPostingsFormat.java (original)
+++ lucene/dev/branches/lucene3069/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestFixedGapPostingsFormat.java Tue Aug 13 04:06:18 2013
@@ -25,10 +25,8 @@ import org.apache.lucene.util._TestUtil;
/**
* Basic tests of a PF using FixedGap terms dictionary
*/
-// TODO: we should add an instantiation for VarGap too to TestFramework, and a test in this package
-// TODO: ensure both of these are also in rotation in RandomCodec
public class TestFixedGapPostingsFormat extends BasePostingsFormatTestCase {
- private final Codec codec = _TestUtil.alwaysPostingsFormat(new Lucene41WithOrds());
+ private final Codec codec = _TestUtil.alwaysPostingsFormat(new Lucene41WithOrds(_TestUtil.nextInt(random(), 1, 1000)));
@Override
protected Codec getCodec() {
Modified: lucene/dev/branches/lucene3069/lucene/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/common-build.xml?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/common-build.xml (original)
+++ lucene/dev/branches/lucene3069/lucene/common-build.xml Tue Aug 13 04:06:18 2013
@@ -230,9 +230,6 @@
<property name="svn.exe" value="svn" />
<property name="perl.exe" value="perl" />
- <property name="hg.exe" value="hg" />
- <property name="moman.url" value="https://bitbucket.org/jpbarrette/moman" />
- <property name="moman.rev" value="120" />
<property name="python.exe" value="python" />
<property name="python32.exe" value="python3.2" />
@@ -445,20 +442,6 @@
</sequential>
</macrodef>
- <target name="jflex-uptodate-check">
- <uptodate property="jflex.files.uptodate">
- <srcfiles dir="${src.dir}" includes="**/*.jflex" />
- <mapper type="glob" from="*.jflex" to="*.java"/>
- </uptodate>
- </target>
-
- <target name="jflex-notice" depends="jflex-uptodate-check" unless="jflex.files.uptodate">
- <echo>
- One or more of the JFlex .jflex files is newer than its corresponding
- .java file. Run the "jflex" target to regenerate the artifacts.
- </echo>
- </target>
-
<target name="jflex-check">
<available property="jflex.present" classname="jflex.anttask.JFlexTask">
<classpath refid="jflex.classpath"/>
@@ -471,7 +454,7 @@
Please install the jFlex 1.5 version (currently not released)
from its SVN repository:
- svn co http://jflex.svn.sourceforge.net/svnroot/jflex/trunk jflex
+ svn co -r 623 http://jflex.svn.sourceforge.net/svnroot/jflex/trunk jflex
cd jflex
mvn install
@@ -2100,6 +2083,8 @@ ${tests-output}/junit4-*.suites - pe
<property name="pegdown.loaded" value="true"/>
</target>
+ <target name="regenerate"/>
+
<macrodef name="pegdown">
<attribute name="todir"/>
<attribute name="flatten" default="false"/>
Modified: lucene/dev/branches/lucene3069/lucene/core/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/build.xml?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/build.xml (original)
+++ lucene/dev/branches/lucene3069/lucene/core/build.xml Tue Aug 13 04:06:18 2013
@@ -24,6 +24,9 @@
<import file="../common-build.xml"/>
+ <property name="moman.commit-hash" value="5c5c2a1e4dea" />
+ <property name="moman.url" value="https://bitbucket.org/jpbarrette/moman/get/${moman.commit-hash}.zip" />
+
<path id="classpath"/>
<path id="test.classpath">
@@ -109,36 +112,24 @@
<fixcrlf srcdir="src/java/org/apache/lucene/util/packed" includes="BulkOperation*.java,Direct*.java,Packed64SingleBlock.java,Packed*ThreeBlocks.py" encoding="UTF-8"/>
</target>
- <target name="createLevAutomata" depends="check-moman,clone-moman,pull-moman">
+ <target name="createLevAutomata" depends="check-moman,download-moman">
<createLevAutomaton n="1"/>
<createLevAutomaton n="2"/>
</target>
<target name="check-moman">
- <condition property="moman.cloned">
- <available file="${build.dir}/moman"/>
- </condition>
+ <available file="${build.dir}/moman" property="moman.downloaded"/>
</target>
- <target name="clone-moman" unless="moman.cloned">
- <mkdir dir="${build.dir}"/>
- <exec dir="${build.dir}"
- executable="${hg.exe}" failonerror="true">
- <arg value="clone"/>
- <arg value="-r"/>
- <arg value="${moman.rev}"/>
- <arg value="${moman.url}"/>
- <arg value="moman"/>
- </exec>
+ <target name="download-moman" unless="moman.downloaded">
+ <mkdir dir="${build.dir}/moman"/>
+ <get src="${moman.url}" dest="${build.dir}/moman.zip"/>
+ <unzip dest="${build.dir}/moman" src="${build.dir}/moman.zip">
+ <cutdirsmapper dirs="1"/>
+ </unzip>
+ <delete file="${build.dir}/moman.zip"/>
</target>
- <target name="pull-moman" if="moman.cloned">
- <exec dir="${build.dir}/moman"
- executable="${hg.exe}" failonerror="true">
- <arg value="pull"/>
- <arg value="-f"/>
- <arg value="-r"/>
- <arg value="${moman.rev}"/>
- </exec>
- </target>
+ <target name="regenerate" depends="createLevAutomata,createPackedIntSources"/>
+
</project>
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java Tue Aug 13 04:06:18 2013
@@ -51,16 +51,35 @@ public abstract class AnalyzerWrapper ex
/**
* Wraps / alters the given TokenStreamComponents, taken from the wrapped
- * Analyzer, to form new components. It is through this method that new
- * TokenFilters can be added by AnalyzerWrappers.
- *
- *
- * @param fieldName Name of the field which is to be analyzed
- * @param components TokenStreamComponents taken from the wrapped Analyzer
+ * Analyzer, to form new components. It is through this method that new
+ * TokenFilters can be added by AnalyzerWrappers. By default, the given
+ * components are returned.
+ *
+ * @param fieldName
+ * Name of the field which is to be analyzed
+ * @param components
+ * TokenStreamComponents taken from the wrapped Analyzer
* @return Wrapped / altered TokenStreamComponents.
*/
- protected abstract TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components);
+ protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
+ return components;
+ }
+ /**
+ * Wraps / alters the given Reader. Through this method AnalyzerWrappers can
+ * implement {@link #initReader(String, Reader)}. By default, the given reader
+ * is returned.
+ *
+ * @param fieldName
+ * name of the field which is to be analyzed
+ * @param reader
+ * the reader to wrap
+ * @return the wrapped reader
+ */
+ protected Reader wrapReader(String fieldName, Reader reader) {
+ return reader;
+ }
+
@Override
protected final TokenStreamComponents createComponents(String fieldName, Reader aReader) {
return wrapComponents(fieldName, getWrappedAnalyzer(fieldName).createComponents(fieldName, aReader));
@@ -78,6 +97,6 @@ public abstract class AnalyzerWrapper ex
@Override
public final Reader initReader(String fieldName, Reader reader) {
- return getWrappedAnalyzer(fieldName).initReader(fieldName, reader);
+ return getWrappedAnalyzer(fieldName).initReader(fieldName, wrapReader(fieldName, reader));
}
}
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java Tue Aug 13 04:06:18 2013
@@ -32,7 +32,8 @@ import org.apache.lucene.util.automaton.
// TODO: maybe also toFST? then we can translate atts into FST outputs/weights
/** Consumes a TokenStream and creates an {@link Automaton}
- * where the transition labels are UTF8 bytes from the {@link
+ * where the transition labels are UTF8 bytes (or Unicode
+ * code points if unicodeArcs is true) from the {@link
* TermToBytesRefAttribute}. Between tokens we insert
* POS_SEP and for holes we insert HOLE.
*
@@ -40,6 +41,7 @@ import org.apache.lucene.util.automaton.
public class TokenStreamToAutomaton {
private boolean preservePositionIncrements;
+ private boolean unicodeArcs;
/** Sole constructor. */
public TokenStreamToAutomaton() {
@@ -51,6 +53,12 @@ public class TokenStreamToAutomaton {
this.preservePositionIncrements = enablePositionIncrements;
}
+ /** Whether to make transition labels Unicode code points instead of UTF8 bytes,
+ * <code>false</code> by default */
+ public void setUnicodeArcs(boolean unicodeArcs) {
+ this.unicodeArcs = unicodeArcs;
+ }
+
private static class Position implements RollingBuffer.Resettable {
// Any tokens that ended at our position arrive to this state:
State arriving;
@@ -80,15 +88,16 @@ public class TokenStreamToAutomaton {
}
/** We create transition between two adjacent tokens. */
- public static final int POS_SEP = 256;
+ public static final int POS_SEP = 0x001f;
/** We add this arc to represent a hole. */
- public static final int HOLE = 257;
+ public static final int HOLE = 0x001e;
/** Pulls the graph (including {@link
* PositionLengthAttribute}) from the provided {@link
* TokenStream}, and creates the corresponding
- * automaton where arcs are bytes from each term. */
+ * automaton where arcs are bytes (or Unicode code points
+ * if unicodeArcs = true) from each term. */
public Automaton toAutomaton(TokenStream in) throws IOException {
final Automaton a = new Automaton();
boolean deterministic = true;
@@ -156,16 +165,34 @@ public class TokenStreamToAutomaton {
final int endPos = pos + posLengthAtt.getPositionLength();
termBytesAtt.fillBytesRef();
- final BytesRef term2 = changeToken(term);
+ final BytesRef termUTF8 = changeToken(term);
+ int[] termUnicode = null;
final Position endPosData = positions.get(endPos);
if (endPosData.arriving == null) {
endPosData.arriving = new State();
}
State state = posData.leaving;
- for(int byteIDX=0;byteIDX<term2.length;byteIDX++) {
- final State nextState = byteIDX == term2.length-1 ? endPosData.arriving : new State();
- state.addTransition(new Transition(term2.bytes[term2.offset + byteIDX] & 0xff, nextState));
+ int termLen;
+ if (unicodeArcs) {
+ final String utf16 = termUTF8.utf8ToString();
+ termUnicode = new int[utf16.codePointCount(0, utf16.length())];
+ termLen = termUnicode.length;
+ for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp))
+ termUnicode[j++] = cp = utf16.codePointAt(i);
+ } else {
+ termLen = termUTF8.length;
+ }
+
+ for(int byteIDX=0;byteIDX<termLen;byteIDX++) {
+ final State nextState = byteIDX == termLen-1 ? endPosData.arriving : new State();
+ int c;
+ if (unicodeArcs) {
+ c = termUnicode[byteIDX];
+ } else {
+ c = termUTF8.bytes[termUTF8.offset + byteIDX] & 0xff;
+ }
+ state.addTransition(new Transition(c, nextState));
state = nextState;
}
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java Tue Aug 13 04:06:18 2013
@@ -67,9 +67,9 @@ import org.apache.lucene.util.fst.Util;
* does not support a pluggable terms index
* implementation).
*
- * <p><b>NOTE</b>: this terms dictionary does not support
- * index divisor when opening an IndexReader. Instead, you
- * can change the min/maxItemsPerBlock during indexing.</p>
+ * <p><b>NOTE</b>: this terms dictionary supports
+ * min/maxItemsPerBlock during indexing to control how
+ * much memory the terms index uses.</p>
*
* <p>The data structure used by this implementation is very
* similar to a burst trie
@@ -112,7 +112,7 @@ public class BlockTreeTermsReader extend
/** Sole constructor. */
public BlockTreeTermsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo info,
PostingsReaderBase postingsReader, IOContext ioContext,
- String segmentSuffix, int indexDivisor)
+ String segmentSuffix)
throws IOException {
this.postingsReader = postingsReader;
@@ -126,13 +126,11 @@ public class BlockTreeTermsReader extend
try {
version = readHeader(in);
- if (indexDivisor != -1) {
- indexIn = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, BlockTreeTermsWriter.TERMS_INDEX_EXTENSION),
+ indexIn = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, BlockTreeTermsWriter.TERMS_INDEX_EXTENSION),
ioContext);
- int indexVersion = readIndexHeader(indexIn);
- if (indexVersion != version) {
- throw new CorruptIndexException("mixmatched version files: " + in + "=" + version + "," + indexIn + "=" + indexVersion);
- }
+ int indexVersion = readIndexHeader(indexIn);
+ if (indexVersion != version) {
+ throw new CorruptIndexException("mixmatched version files: " + in + "=" + version + "," + indexIn + "=" + indexVersion);
}
// Have PostingsReader init itself
@@ -140,9 +138,7 @@ public class BlockTreeTermsReader extend
// Read per-field details
seekDir(in, dirOffset);
- if (indexDivisor != -1) {
- seekDir(indexIn, indexDirOffset);
- }
+ seekDir(indexIn, indexDirOffset);
final int numFields = in.readVInt();
if (numFields < 0) {
@@ -171,15 +167,13 @@ public class BlockTreeTermsReader extend
if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq + " (resource=" + in + ")");
}
- final long indexStartFP = indexDivisor != -1 ? indexIn.readVLong() : 0;
+ final long indexStartFP = indexIn.readVLong();
FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, indexIn));
if (previous != null) {
throw new CorruptIndexException("duplicate field: " + fieldInfo.name + " (resource=" + in + ")");
}
}
- if (indexDivisor != -1) {
- indexIn.close();
- }
+ indexIn.close();
success = true;
} finally {
@@ -1222,7 +1216,7 @@ public class BlockTreeTermsReader extend
}
@Override
- public boolean seekExact(BytesRef text, boolean useCache) {
+ public boolean seekExact(BytesRef text) {
throw new UnsupportedOperationException();
}
@@ -1237,7 +1231,7 @@ public class BlockTreeTermsReader extend
}
@Override
- public SeekStatus seekCeil(BytesRef text, boolean useCache) {
+ public SeekStatus seekCeil(BytesRef text) {
throw new UnsupportedOperationException();
}
}
@@ -1499,7 +1493,7 @@ public class BlockTreeTermsReader extend
}
@Override
- public boolean seekExact(final BytesRef target, final boolean useCache) throws IOException {
+ public boolean seekExact(final BytesRef target) throws IOException {
if (index == null) {
throw new IllegalStateException("terms index was not loaded");
@@ -1713,7 +1707,6 @@ public class BlockTreeTermsReader extend
if (arc.output != NO_OUTPUT) {
output = fstOutputs.add(output, arc.output);
}
-
// if (DEBUG) {
// System.out.println(" index: follow label=" + toHex(target.bytes[target.offset + targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
// }
@@ -1760,7 +1753,7 @@ public class BlockTreeTermsReader extend
}
@Override
- public SeekStatus seekCeil(final BytesRef target, final boolean useCache) throws IOException {
+ public SeekStatus seekCeil(final BytesRef target) throws IOException {
if (index == null) {
throw new IllegalStateException("terms index was not loaded");
}
@@ -2096,7 +2089,7 @@ public class BlockTreeTermsReader extend
// this method catches up all internal state so next()
// works properly:
//if (DEBUG) System.out.println(" re-seek to pending term=" + term.utf8ToString() + " " + term);
- final boolean result = seekExact(term, false);
+ final boolean result = seekExact(term);
assert result;
}
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java Tue Aug 13 04:06:18 2013
@@ -824,7 +824,7 @@ public final class CompressingTermVector
}
@Override
- public SeekStatus seekCeil(BytesRef text, boolean useCache)
+ public SeekStatus seekCeil(BytesRef text)
throws IOException {
if (ord < numTerms && ord >= 0) {
final int cmp = term().compareTo(text);
@@ -851,16 +851,7 @@ public final class CompressingTermVector
@Override
public void seekExact(long ord) throws IOException {
- if (ord < -1 || ord >= numTerms) {
- throw new IOException("ord is out of range: ord=" + ord + ", numTerms=" + numTerms);
- }
- if (ord < this.ord) {
- reset();
- }
- for (int i = this.ord; i < ord; ++i) {
- next();
- }
- assert ord == this.ord();
+ throw new UnsupportedOperationException();
}
@Override
@@ -870,7 +861,7 @@ public final class CompressingTermVector
@Override
public long ord() throws IOException {
- return ord;
+ throw new UnsupportedOperationException();
}
@Override
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java Tue Aug 13 04:06:18 2013
@@ -258,8 +258,7 @@ public class Lucene40PostingsFormat exte
state.segmentInfo,
postings,
state.context,
- state.segmentSuffix,
- state.termsIndexDivisor);
+ state.segmentSuffix);
success = true;
return ret;
} finally {
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java Tue Aug 13 04:06:18 2013
@@ -433,7 +433,7 @@ public class Lucene40TermVectorsReader e
// NOTE: slow! (linear scan)
@Override
- public SeekStatus seekCeil(BytesRef text, boolean useCache)
+ public SeekStatus seekCeil(BytesRef text)
throws IOException {
if (nextTerm != 0) {
final int cmp = text.compareTo(term);
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java Tue Aug 13 04:06:18 2013
@@ -439,8 +439,7 @@ public final class Lucene41PostingsForma
state.segmentInfo,
postingsReader,
state.context,
- state.segmentSuffix,
- state.termsIndexDivisor);
+ state.segmentSuffix);
success = true;
return ret;
} finally {
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java Tue Aug 13 04:06:18 2013
@@ -490,7 +490,7 @@ class Lucene42DocValuesProducer extends
}
@Override
- public SeekStatus seekCeil(BytesRef text, boolean useCache) throws IOException {
+ public SeekStatus seekCeil(BytesRef text) throws IOException {
if (in.seekCeil(text) == null) {
return SeekStatus.END;
} else if (term().equals(text)) {
@@ -503,7 +503,7 @@ class Lucene42DocValuesProducer extends
}
@Override
- public boolean seekExact(BytesRef text, boolean useCache) throws IOException {
+ public boolean seekExact(BytesRef text) throws IOException {
if (in.seekExact(text) == null) {
return false;
} else {
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockPostingsFormat.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockPostingsFormat.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockPostingsFormat.java Tue Aug 13 04:06:18 2013
@@ -437,8 +437,7 @@ public final class TempBlockPostingsForm
state.segmentInfo,
postingsReader,
state.context,
- state.segmentSuffix,
- state.termsIndexDivisor);
+ state.segmentSuffix);
success = true;
return ret;
} finally {
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermsReader.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermsReader.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermsReader.java Tue Aug 13 04:06:18 2013
@@ -117,7 +117,7 @@ public class TempBlockTermsReader extend
/** Sole constructor. */
public TempBlockTermsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo info,
TempPostingsReaderBase postingsReader, IOContext ioContext,
- String segmentSuffix, int indexDivisor)
+ String segmentSuffix)
throws IOException {
this.postingsReader = postingsReader;
@@ -131,13 +131,11 @@ public class TempBlockTermsReader extend
try {
version = readHeader(in);
- if (indexDivisor != -1) {
- indexIn = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, TempBlockTermsWriter.TERMS_INDEX_EXTENSION),
+ indexIn = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, TempBlockTermsWriter.TERMS_INDEX_EXTENSION),
ioContext);
- int indexVersion = readIndexHeader(indexIn);
- if (indexVersion != version) {
- throw new CorruptIndexException("mixmatched version files: " + in + "=" + version + "," + indexIn + "=" + indexVersion);
- }
+ int indexVersion = readIndexHeader(indexIn);
+ if (indexVersion != version) {
+ throw new CorruptIndexException("mixmatched version files: " + in + "=" + version + "," + indexIn + "=" + indexVersion);
}
// Have PostingsReader init itself
@@ -145,9 +143,7 @@ public class TempBlockTermsReader extend
// Read per-field details
seekDir(in, dirOffset);
- if (indexDivisor != -1) {
- seekDir(indexIn, indexDirOffset);
- }
+ seekDir(indexIn, indexDirOffset);
final int numFields = in.readVInt();
if (numFields < 0) {
@@ -177,15 +173,13 @@ public class TempBlockTermsReader extend
if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq + " (resource=" + in + ")");
}
- final long indexStartFP = indexDivisor != -1 ? indexIn.readVLong() : 0;
+ final long indexStartFP = indexIn.readVLong();
FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, longsSize, indexIn));
if (previous != null) {
throw new CorruptIndexException("duplicate field: " + fieldInfo.name + " (resource=" + in + ")");
}
}
- if (indexDivisor != -1) {
- indexIn.close();
- }
+ indexIn.close();
success = true;
} finally {
@@ -1251,7 +1245,7 @@ public class TempBlockTermsReader extend
}
@Override
- public boolean seekExact(BytesRef text, boolean useCache) {
+ public boolean seekExact(BytesRef text) {
throw new UnsupportedOperationException();
}
@@ -1266,7 +1260,7 @@ public class TempBlockTermsReader extend
}
@Override
- public SeekStatus seekCeil(BytesRef text, boolean useCache) {
+ public SeekStatus seekCeil(BytesRef text) {
throw new UnsupportedOperationException();
}
}
@@ -1528,7 +1522,7 @@ public class TempBlockTermsReader extend
}
@Override
- public boolean seekExact(final BytesRef target, final boolean useCache) throws IOException {
+ public boolean seekExact(final BytesRef target) throws IOException {
if (index == null) {
throw new IllegalStateException("terms index was not loaded");
@@ -1789,7 +1783,7 @@ public class TempBlockTermsReader extend
}
@Override
- public SeekStatus seekCeil(final BytesRef target, final boolean useCache) throws IOException {
+ public SeekStatus seekCeil(final BytesRef target) throws IOException {
if (index == null) {
throw new IllegalStateException("terms index was not loaded");
}
@@ -2125,7 +2119,7 @@ public class TempBlockTermsReader extend
// this method catches up all internal state so next()
// works properly:
//if (DEBUG) System.out.println(" re-seek to pending term=" + term.utf8ToString() + " " + term);
- final boolean result = seekExact(term, false);
+ final boolean result = seekExact(term);
assert result;
}
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsReader.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsReader.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsReader.java Tue Aug 13 04:06:18 2013
@@ -461,7 +461,7 @@ public class TempFSTOrdTermsReader exten
public BytesRef next() throws IOException {
if (seekPending) { // previously positioned, but termOutputs not fetched
seekPending = false;
- SeekStatus status = seekCeil(term, false);
+ SeekStatus status = seekCeil(term);
assert status == SeekStatus.FOUND; // must positioned on valid term
}
updateEnum(fstEnum.next());
@@ -469,13 +469,13 @@ public class TempFSTOrdTermsReader exten
}
@Override
- public boolean seekExact(BytesRef target, boolean useCache) throws IOException {
+ public boolean seekExact(BytesRef target) throws IOException {
updateEnum(fstEnum.seekExact(target));
return term != null;
}
@Override
- public SeekStatus seekCeil(BytesRef target, boolean useCache) throws IOException {
+ public SeekStatus seekCeil(BytesRef target) throws IOException {
updateEnum(fstEnum.seekCeil(target));
if (term == null) {
return SeekStatus.END;
@@ -587,17 +587,9 @@ public class TempFSTOrdTermsReader exten
super.decodeStats();
}
- // nocommit: need testcase for this
@Override
- public SeekStatus seekCeil(BytesRef target, boolean useCache) throws IOException {
- decoded = false;
- term = doSeekCeil(target);
- decodeStats();
- if (term == null) {
- return SeekStatus.END;
- } else {
- return term.equals(target) ? SeekStatus.FOUND : SeekStatus.NOT_FOUND;
- }
+ public SeekStatus seekCeil(BytesRef target) throws IOException {
+ throw new UnsupportedOperationException();
}
@Override
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTTermsReader.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTTermsReader.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTTermsReader.java Tue Aug 13 04:06:18 2013
@@ -348,7 +348,7 @@ public class TempFSTTermsReader extends
public BytesRef next() throws IOException {
if (seekPending) { // previously positioned, but termOutputs not fetched
seekPending = false;
- SeekStatus status = seekCeil(term, false);
+ SeekStatus status = seekCeil(term);
assert status == SeekStatus.FOUND; // must positioned on valid term
}
updateEnum(fstEnum.next());
@@ -356,13 +356,13 @@ public class TempFSTTermsReader extends
}
@Override
- public boolean seekExact(BytesRef target, boolean useCache) throws IOException {
+ public boolean seekExact(BytesRef target) throws IOException {
updateEnum(fstEnum.seekExact(target));
return term != null;
}
@Override
- public SeekStatus seekCeil(BytesRef target, boolean useCache) throws IOException {
+ public SeekStatus seekCeil(BytesRef target) throws IOException {
updateEnum(fstEnum.seekCeil(target));
if (term == null) {
return SeekStatus.END;
@@ -497,7 +497,7 @@ public class TempFSTTermsReader extends
}
@Override
- public SeekStatus seekCeil(BytesRef target, boolean useCache) throws IOException {
+ public SeekStatus seekCeil(BytesRef target) throws IOException {
decoded = false;
term = doSeekCeil(target);
loadMetaData();
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java Tue Aug 13 04:06:18 2013
@@ -78,7 +78,7 @@ public abstract class AtomicReader exten
return 0;
}
final TermsEnum termsEnum = terms.iterator(null);
- if (termsEnum.seekExact(term.bytes(), true)) {
+ if (termsEnum.seekExact(term.bytes())) {
return termsEnum.docFreq();
} else {
return 0;
@@ -101,7 +101,7 @@ public abstract class AtomicReader exten
return 0;
}
final TermsEnum termsEnum = terms.iterator(null);
- if (termsEnum.seekExact(term.bytes(), true)) {
+ if (termsEnum.seekExact(term.bytes())) {
return termsEnum.totalTermFreq();
} else {
return 0;
@@ -156,7 +156,7 @@ public abstract class AtomicReader exten
final Terms terms = fields.terms(term.field());
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
- if (termsEnum.seekExact(term.bytes(), true)) {
+ if (termsEnum.seekExact(term.bytes())) {
return termsEnum.docs(getLiveDocs(), null);
}
}
@@ -176,7 +176,7 @@ public abstract class AtomicReader exten
final Terms terms = fields.terms(term.field());
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
- if (termsEnum.seekExact(term.bytes(), true)) {
+ if (termsEnum.seekExact(term.bytes())) {
return termsEnum.docsAndPositions(getLiveDocs(), null);
}
}
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java Tue Aug 13 04:06:18 2013
@@ -26,7 +26,8 @@ import org.apache.lucene.util.ByteBlockP
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;
-import org.apache.lucene.util.packed.AppendingLongBuffer;
+import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
+import org.apache.lucene.util.packed.PackedInts;
import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
@@ -36,14 +37,14 @@ import static org.apache.lucene.util.Byt
class BinaryDocValuesWriter extends DocValuesWriter {
private final ByteBlockPool pool;
- private final AppendingLongBuffer lengths;
+ private final AppendingDeltaPackedLongBuffer lengths;
private final FieldInfo fieldInfo;
private int addedValues = 0;
public BinaryDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
this.fieldInfo = fieldInfo;
this.pool = new ByteBlockPool(new DirectTrackingAllocator(iwBytesUsed));
- this.lengths = new AppendingLongBuffer();
+ this.lengths = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
}
public void addValue(int docID, BytesRef value) {
@@ -90,7 +91,7 @@ class BinaryDocValuesWriter extends DocV
// iterates over the values we have in ram
private class BytesIterator implements Iterator<BytesRef> {
final BytesRef value = new BytesRef();
- final AppendingLongBuffer.Iterator lengthsIterator = lengths.iterator();
+ final AppendingDeltaPackedLongBuffer.Iterator lengthsIterator = lengths.iterator();
final int size = (int) lengths.size();
final int maxDoc;
int upto;
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/BufferedDeletes.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/BufferedDeletes.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/BufferedDeletes.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/BufferedDeletes.java Tue Aug 13 04:06:18 2013
@@ -33,9 +33,9 @@ import org.apache.lucene.util.RamUsageEs
* deletes are pushed (on flush in DocumentsWriter), these
* deletes are converted to a FrozenDeletes instance. */
-// NOTE: we are sync'd by BufferedDeletes, ie, all access to
-// instances of this class is via sync'd methods on
-// BufferedDeletes
+// NOTE: instances of this class are accessed either via a private
+// instance on DocumentWriterPerThread, or via sync'd code by
+// DocumentsWriterDeleteQueue
class BufferedDeletes {
@@ -136,6 +136,9 @@ class BufferedDeletes {
}
terms.put(term, Integer.valueOf(docIDUpto));
+ // note that if current != null then it means there's already a buffered
+ // delete on that term, therefore we seem to over-count. this over-counting
+ // is done to respect IndexWriterConfig.setMaxBufferedDeleteTerms.
numTermDeletes.incrementAndGet();
if (current == null) {
bytesUsed.addAndGet(BYTES_PER_DEL_TERM + term.bytes.length + (RamUsageEstimator.NUM_BYTES_CHAR * term.field().length()));
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java Tue Aug 13 04:06:18 2013
@@ -390,7 +390,7 @@ class BufferedDeletesStream {
// System.out.println(" term=" + term);
- if (termsEnum.seekExact(term.bytes(), false)) {
+ if (termsEnum.seekExact(term.bytes())) {
// we don't need term frequencies for this
DocsEnum docsEnum = termsEnum.docs(rld.getLiveDocs(), docs, DocsEnum.FLAG_NONE);
//System.out.println("BDS: got docsEnum=" + docsEnum);
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java Tue Aug 13 04:06:18 2013
@@ -30,8 +30,8 @@ import java.util.Map;
import org.apache.lucene.codecs.BlockTreeTermsReader;
import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.PostingsFormat; // javadocs
-import org.apache.lucene.document.FieldType; // for javadocs
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory;
@@ -159,19 +159,6 @@ public class CheckIndex {
* segment. */
public double sizeMB;
- /** Doc store offset, if this segment shares the doc
- * store files (stored fields and term vectors) with
- * other segments. This is -1 if it does not share. */
- public int docStoreOffset = -1;
-
- /** String of the shared doc store segment, or null if
- * this segment does not share the doc store files. */
- public String docStoreSegment;
-
- /** True if the shared doc store files are compound file
- * format. */
- public boolean docStoreCompoundFile;
-
/** True if this segment has pending deletions. */
public boolean hasDeletions;
@@ -297,10 +284,21 @@ public class CheckIndex {
DocValuesStatus() {
}
- /** Number of documents tested. */
- public int docCount;
/** Total number of docValues tested. */
public long totalValueFields;
+
+ /** Total number of numeric fields */
+ public long totalNumericFields;
+
+ /** Total number of binary fields */
+ public long totalBinaryFields;
+
+ /** Total number of sorted fields */
+ public long totalSortedFields;
+
+ /** Total number of sortedset fields */
+ public long totalSortedSetFields;
+
/** Exception thrown during doc values test (null on success) */
public Throwable error = null;
}
@@ -535,7 +533,7 @@ public class CheckIndex {
}
if (infoStream != null)
infoStream.print(" test: open reader.........");
- reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, IOContext.DEFAULT);
+ reader = new SegmentReader(info, IOContext.DEFAULT);
segInfoStat.openReaderPassed = true;
@@ -1117,7 +1115,7 @@ public class CheckIndex {
long totDocCountNoDeletes = 0;
long totDocFreq = 0;
for(int i=0;i<seekCount;i++) {
- if (!termsEnum.seekExact(seekTerms[i], true)) {
+ if (!termsEnum.seekExact(seekTerms[i])) {
throw new RuntimeException("seek to existing term " + seekTerms[i] + " failed");
}
@@ -1272,7 +1270,7 @@ public class CheckIndex {
for (FieldInfo fieldInfo : reader.getFieldInfos()) {
if (fieldInfo.hasDocValues()) {
status.totalValueFields++;
- checkDocValues(fieldInfo, reader, infoStream);
+ checkDocValues(fieldInfo, reader, infoStream, status);
} else {
if (reader.getBinaryDocValues(fieldInfo.name) != null ||
reader.getNumericDocValues(fieldInfo.name) != null ||
@@ -1283,7 +1281,11 @@ public class CheckIndex {
}
}
- msg(infoStream, "OK [" + status.docCount + " total doc count; " + status.totalValueFields + " docvalues fields]");
+ msg(infoStream, "OK [" + status.totalValueFields + " docvalues fields; "
+ + status.totalBinaryFields + " BINARY; "
+ + status.totalNumericFields + " NUMERIC; "
+ + status.totalSortedFields + " SORTED; "
+ + status.totalSortedSetFields + " SORTED_SET]");
} catch (Throwable e) {
msg(infoStream, "ERROR [" + String.valueOf(e.getMessage()) + "]");
status.error = e;
@@ -1382,9 +1384,10 @@ public class CheckIndex {
}
}
- private static void checkDocValues(FieldInfo fi, AtomicReader reader, PrintStream infoStream) throws Exception {
+ private static void checkDocValues(FieldInfo fi, AtomicReader reader, PrintStream infoStream, DocValuesStatus status) throws Exception {
switch(fi.getDocValuesType()) {
case SORTED:
+ status.totalSortedFields++;
checkSortedDocValues(fi.name, reader, reader.getSortedDocValues(fi.name));
if (reader.getBinaryDocValues(fi.name) != null ||
reader.getNumericDocValues(fi.name) != null ||
@@ -1393,6 +1396,7 @@ public class CheckIndex {
}
break;
case SORTED_SET:
+ status.totalSortedSetFields++;
checkSortedSetDocValues(fi.name, reader, reader.getSortedSetDocValues(fi.name));
if (reader.getBinaryDocValues(fi.name) != null ||
reader.getNumericDocValues(fi.name) != null ||
@@ -1401,6 +1405,7 @@ public class CheckIndex {
}
break;
case BINARY:
+ status.totalBinaryFields++;
checkBinaryDocValues(fi.name, reader, reader.getBinaryDocValues(fi.name));
if (reader.getNumericDocValues(fi.name) != null ||
reader.getSortedDocValues(fi.name) != null ||
@@ -1409,6 +1414,7 @@ public class CheckIndex {
}
break;
case NUMERIC:
+ status.totalNumericFields++;
checkNumericDocValues(fi.name, reader, reader.getNumericDocValues(fi.name));
if (reader.getBinaryDocValues(fi.name) != null ||
reader.getSortedDocValues(fi.name) != null ||
@@ -1543,7 +1549,7 @@ public class CheckIndex {
}
final DocsEnum postingsDocs2;
- if (!postingsTermsEnum.seekExact(term, true)) {
+ if (!postingsTermsEnum.seekExact(term)) {
throw new RuntimeException("vector term=" + term + " field=" + field + " does not exist in postings; doc=" + j);
}
postingsPostings = postingsTermsEnum.docsAndPositions(null, postingsPostings);
@@ -1677,7 +1683,7 @@ public class CheckIndex {
*
* <p><b>WARNING</b>: Make sure you only call this when the
* index is not opened by any writer. */
- public void fixIndex(Status result, Codec codec) throws IOException {
+ public void fixIndex(Status result) throws IOException {
if (result.partial)
throw new IllegalArgumentException("can only fix an index that was fully checked (this status checked a subset of segments)");
result.newSegments.changed();
@@ -1732,7 +1738,6 @@ public class CheckIndex {
boolean doFix = false;
boolean doCrossCheckTermVectors = false;
- Codec codec = Codec.getDefault(); // only used when fixing
boolean verbose = false;
List<String> onlySegments = new ArrayList<String>();
String indexPath = null;
@@ -1744,13 +1749,6 @@ public class CheckIndex {
doFix = true;
} else if ("-crossCheckTermVectors".equals(arg)) {
doCrossCheckTermVectors = true;
- } else if ("-codec".equals(arg)) {
- if (i == args.length-1) {
- System.out.println("ERROR: missing name for -codec option");
- System.exit(1);
- }
- i++;
- codec = Codec.forName(args[i]);
} else if (arg.equals("-verbose")) {
verbose = true;
} else if (arg.equals("-segment")) {
@@ -1851,7 +1849,7 @@ public class CheckIndex {
System.out.println(" " + (5-s) + "...");
}
System.out.println("Writing...");
- checker.fixIndex(result, codec);
+ checker.fixIndex(result);
System.out.println("OK");
System.out.println("Wrote new segments file \"" + result.newSegments.getSegmentsFileName() + "\"");
}
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java Tue Aug 13 04:06:18 2013
@@ -52,9 +52,6 @@ import org.apache.lucene.store.NoSuchDir
*/
public abstract class DirectoryReader extends BaseCompositeReader<AtomicReader> {
- /** Default termInfosIndexDivisor. */
- public static final int DEFAULT_TERMS_INDEX_DIVISOR = 1;
-
/** The index directory. */
protected final Directory directory;
@@ -64,29 +61,7 @@ public abstract class DirectoryReader ex
* @throws IOException if there is a low-level IO error
*/
public static DirectoryReader open(final Directory directory) throws IOException {
- return StandardDirectoryReader.open(directory, null, DEFAULT_TERMS_INDEX_DIVISOR);
- }
-
- /** Expert: Returns a IndexReader reading the index in the given
- * Directory with the given termInfosIndexDivisor.
- * @param directory the index directory
- * @param termInfosIndexDivisor Subsamples which indexed
- * terms are loaded into RAM. This has the same effect as {@link
- * IndexWriterConfig#setTermIndexInterval} except that setting
- * must be done at indexing time while this setting can be
- * set per reader. When set to N, then one in every
- * N*termIndexInterval terms in the index is loaded into
- * memory. By setting this to a value > 1 you can reduce
- * memory usage, at the expense of higher latency when
- * loading a TermInfo. The default value is 1. Set this
- * to -1 to skip loading the terms index entirely.
- * <b>NOTE:</b> divisor settings > 1 do not apply to all PostingsFormat
- * implementations, including the default one in this release. It only makes
- * sense for terms indexes that can efficiently re-sample terms at load time.
- * @throws IOException if there is a low-level IO error
- */
- public static DirectoryReader open(final Directory directory, int termInfosIndexDivisor) throws IOException {
- return StandardDirectoryReader.open(directory, null, termInfosIndexDivisor);
+ return StandardDirectoryReader.open(directory, null);
}
/**
@@ -118,29 +93,7 @@ public abstract class DirectoryReader ex
* @throws IOException if there is a low-level IO error
*/
public static DirectoryReader open(final IndexCommit commit) throws IOException {
- return StandardDirectoryReader.open(commit.getDirectory(), commit, DEFAULT_TERMS_INDEX_DIVISOR);
- }
-
- /** Expert: returns an IndexReader reading the index in the given
- * {@link IndexCommit} and termInfosIndexDivisor.
- * @param commit the commit point to open
- * @param termInfosIndexDivisor Subsamples which indexed
- * terms are loaded into RAM. This has the same effect as {@link
- * IndexWriterConfig#setTermIndexInterval} except that setting
- * must be done at indexing time while this setting can be
- * set per reader. When set to N, then one in every
- * N*termIndexInterval terms in the index is loaded into
- * memory. By setting this to a value > 1 you can reduce
- * memory usage, at the expense of higher latency when
- * loading a TermInfo. The default value is 1. Set this
- * to -1 to skip loading the terms index entirely.
- * <b>NOTE:</b> divisor settings > 1 do not apply to all PostingsFormat
- * implementations, including the default one in this release. It only makes
- * sense for terms indexes that can efficiently re-sample terms at load time.
- * @throws IOException if there is a low-level IO error
- */
- public static DirectoryReader open(final IndexCommit commit, int termInfosIndexDivisor) throws IOException {
- return StandardDirectoryReader.open(commit.getDirectory(), commit, termInfosIndexDivisor);
+ return StandardDirectoryReader.open(commit.getDirectory(), commit);
}
/**
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java Tue Aug 13 04:06:18 2013
@@ -659,7 +659,7 @@ public class DocTermOrds {
}
@Override
- public SeekStatus seekCeil(BytesRef target, boolean useCache) throws IOException {
+ public SeekStatus seekCeil(BytesRef target) throws IOException {
// already here
if (term != null && term.equals(target)) {
@@ -729,7 +729,7 @@ public class DocTermOrds {
//System.out.println(" do seek term=" + base.utf8ToString());
ord = idx << indexIntervalBits;
delta = (int) (targetOrd - ord);
- final TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(base, true);
+ final TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(base);
assert seekStatus == TermsEnum.SeekStatus.FOUND;
} else {
//System.out.println("seek w/in block");
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java Tue Aug 13 04:06:18 2013
@@ -403,8 +403,8 @@ class DocumentsWriterPerThread {
++numDocsInRAM;
}
- // Buffer a specific docID for deletion. Currently only
- // used when we hit a exception when adding a document
+ // Buffer a specific docID for deletion. Currently only
+ // used when we hit an exception when adding a document
void deleteDocID(int docIDUpto) {
pendingDeletes.addDocID(docIDUpto);
// NOTE: we do not trigger flush here. This is
@@ -468,7 +468,6 @@ class DocumentsWriterPerThread {
assert deleteSlice == null : "all deletes must be applied in prepareFlush";
segmentInfo.setDocCount(numDocsInRAM);
flushState = new SegmentWriteState(infoStream, directory, segmentInfo, fieldInfos.finish(),
- writer.getConfig().getTermIndexInterval(),
pendingDeletes, new IOContext(new FlushInfo(numDocsInRAM, bytesUsed())));
final double startMBUsed = parent.flushControl.netBytes() / 1024. / 1024.;
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java Tue Aug 13 04:06:18 2013
@@ -147,7 +147,10 @@ abstract class DocumentsWriterPerThreadP
@Override
public DocumentsWriterPerThreadPool clone() {
// We should only be cloned before being used:
- assert numThreadStatesActive == 0;
+ if (numThreadStatesActive != 0) {
+ throw new IllegalStateException("clone this object before it is used!");
+ }
+
DocumentsWriterPerThreadPool clone;
try {
clone = (DocumentsWriterPerThreadPool) super.clone();
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java Tue Aug 13 04:06:18 2013
@@ -157,8 +157,8 @@ public class FilterAtomicReader extends
}
@Override
- public SeekStatus seekCeil(BytesRef text, boolean useCache) throws IOException {
- return in.seekCeil(text, useCache);
+ public SeekStatus seekCeil(BytesRef text) throws IOException {
+ return in.seekCeil(text);
}
@Override
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java Tue Aug 13 04:06:18 2013
@@ -154,7 +154,7 @@ public abstract class FilteredTermsEnum
* support seeking.
*/
@Override
- public boolean seekExact(BytesRef term, boolean useCache) throws IOException {
+ public boolean seekExact(BytesRef term) throws IOException {
throw new UnsupportedOperationException(getClass().getName()+" does not support seeking");
}
@@ -163,7 +163,7 @@ public abstract class FilteredTermsEnum
* support seeking.
*/
@Override
- public SeekStatus seekCeil(BytesRef term, boolean useCache) throws IOException {
+ public SeekStatus seekCeil(BytesRef term) throws IOException {
throw new UnsupportedOperationException(getClass().getName()+" does not support seeking");
}
@@ -222,7 +222,7 @@ public abstract class FilteredTermsEnum
//System.out.println(" seek to t=" + (t == null ? "null" : t.utf8ToString()) + " tenum=" + tenum);
// Make sure we always seek forward:
assert actualTerm == null || t == null || getComparator().compare(t, actualTerm) > 0: "curTerm=" + actualTerm + " seekTerm=" + t;
- if (t == null || tenum.seekCeil(t, false) == SeekStatus.END) {
+ if (t == null || tenum.seekCeil(t) == SeekStatus.END) {
// no more terms to seek to or enum exhausted
//System.out.println(" return null");
return null;
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FlushByRamOrCountsPolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FlushByRamOrCountsPolicy.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FlushByRamOrCountsPolicy.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FlushByRamOrCountsPolicy.java Tue Aug 13 04:06:18 2013
@@ -20,23 +20,32 @@ package org.apache.lucene.index;
import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState;
/**
- * Default {@link FlushPolicy} implementation that flushes based on RAM used,
- * document count and number of buffered deletes depending on the IndexWriter's
- * {@link IndexWriterConfig}.
+ * Default {@link FlushPolicy} implementation that flushes new segments based on
+ * RAM used and document count depending on the IndexWriter's
+ * {@link IndexWriterConfig}. It also applies pending deletes based on the
+ * number of buffered delete terms.
*
* <ul>
- * <li>{@link #onDelete(DocumentsWriterFlushControl, DocumentsWriterPerThreadPool.ThreadState)} - flushes
- * based on the global number of buffered delete terms iff
- * {@link IndexWriterConfig#getMaxBufferedDeleteTerms()} is enabled</li>
- * <li>{@link #onInsert(DocumentsWriterFlushControl, DocumentsWriterPerThreadPool.ThreadState)} - flushes
- * either on the number of documents per {@link DocumentsWriterPerThread} (
+ * <li>
+ * {@link #onDelete(DocumentsWriterFlushControl, DocumentsWriterPerThreadPool.ThreadState)}
+ * - applies pending delete operations based on the global number of buffered
+ * delete terms iff {@link IndexWriterConfig#getMaxBufferedDeleteTerms()} is
+ * enabled</li>
+ * <li>
+ * {@link #onInsert(DocumentsWriterFlushControl, DocumentsWriterPerThreadPool.ThreadState)}
+ * - flushes either on the number of documents per
+ * {@link DocumentsWriterPerThread} (
* {@link DocumentsWriterPerThread#getNumDocsInRAM()}) or on the global active
* memory consumption in the current indexing session iff
* {@link IndexWriterConfig#getMaxBufferedDocs()} or
* {@link IndexWriterConfig#getRAMBufferSizeMB()} is enabled respectively</li>
- * <li>{@link #onUpdate(DocumentsWriterFlushControl, DocumentsWriterPerThreadPool.ThreadState)} - calls
- * {@link #onInsert(DocumentsWriterFlushControl, DocumentsWriterPerThreadPool.ThreadState)} and
- * {@link #onDelete(DocumentsWriterFlushControl, DocumentsWriterPerThreadPool.ThreadState)} in order</li>
+ * <li>
+ * {@link #onUpdate(DocumentsWriterFlushControl, DocumentsWriterPerThreadPool.ThreadState)}
+ * - calls
+ * {@link #onInsert(DocumentsWriterFlushControl, DocumentsWriterPerThreadPool.ThreadState)}
+ * and
+ * {@link #onDelete(DocumentsWriterFlushControl, DocumentsWriterPerThreadPool.ThreadState)}
+ * in order</li>
* </ul>
* All {@link IndexWriterConfig} settings are used to mark
* {@link DocumentsWriterPerThread} as flush pending during indexing with
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FlushPolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FlushPolicy.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FlushPolicy.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FlushPolicy.java Tue Aug 13 04:06:18 2013
@@ -32,18 +32,19 @@ import org.apache.lucene.util.SetOnce;
* {@link IndexWriterConfig#setRAMBufferSizeMB(double)}</li>
* <li>Number of RAM resident documents - configured via
* {@link IndexWriterConfig#setMaxBufferedDocs(int)}</li>
- * <li>Number of buffered delete terms/queries - configured via
- * {@link IndexWriterConfig#setMaxBufferedDeleteTerms(int)}</li>
* </ul>
- *
- * The {@link IndexWriter} consults a provided {@link FlushPolicy} to control the
- * flushing process. The policy is informed for each added or
- * updated document as well as for each delete term. Based on the
- * {@link FlushPolicy}, the information provided via {@link ThreadState} and
+ * The policy also applies pending delete operations (by term and/or query),
+ * given the threshold set in
+ * {@link IndexWriterConfig#setMaxBufferedDeleteTerms(int)}.
+ * <p>
+ * {@link IndexWriter} consults the provided {@link FlushPolicy} to control the
+ * flushing process. The policy is informed for each added or updated document
+ * as well as for each delete term. Based on the {@link FlushPolicy}, the
+ * information provided via {@link ThreadState} and
* {@link DocumentsWriterFlushControl}, the {@link FlushPolicy} decides if a
- * {@link DocumentsWriterPerThread} needs flushing and mark it as
- * flush-pending via
- * {@link DocumentsWriterFlushControl#setFlushPending(DocumentsWriterPerThreadPool.ThreadState)}.
+ * {@link DocumentsWriterPerThread} needs flushing and mark it as flush-pending
+ * via {@link DocumentsWriterFlushControl#setFlushPending}, or if deletes need
+ * to be applied.
*
* @see ThreadState
* @see DocumentsWriterFlushControl
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java Tue Aug 13 04:06:18 2013
@@ -630,15 +630,13 @@ public class IndexWriter implements Clos
/**
* Constructs a new IndexWriter per the settings given in <code>conf</code>.
- * Note that the passed in {@link IndexWriterConfig} is
- * privately cloned, which, in-turn, clones the
- * {@link IndexWriterConfig#getFlushPolicy() flush policy},
- * {@link IndexWriterConfig#getIndexDeletionPolicy() deletion policy},
- * {@link IndexWriterConfig#getMergePolicy() merge policy},
- * and {@link IndexWriterConfig#getMergeScheduler() merge scheduler}.
- * If you need to make subsequent "live"
- * changes to the configuration use {@link #getConfig}.
+ * If you want to make "live" changes to this writer instance, use
+ * {@link #getConfig()}.
+ *
* <p>
+ * <b>NOTE:</b> after ths writer is created, the given configuration instance
+ * cannot be passed to another writer. If you intend to do so, you should
+ * {@link IndexWriterConfig#clone() clone} it beforehand.
*
* @param d
* the index directory. The index is either created or appended
@@ -653,7 +651,8 @@ public class IndexWriter implements Clos
* IO error
*/
public IndexWriter(Directory d, IndexWriterConfig conf) throws IOException {
- config = new LiveIndexWriterConfig(conf.clone());
+ conf.setIndexWriter(this); // prevent reuse by other instances
+ config = new LiveIndexWriterConfig(conf);
directory = d;
analyzer = config.getAnalyzer();
infoStream = config.getInfoStream();
@@ -2429,15 +2428,16 @@ public class IndexWriter implements Clos
* close the writer. See <a href="#OOME">above</a> for details.
*
* <p>
+ * <b>NOTE:</b> empty segments are dropped by this method and not added to this
+ * index.
+ *
+ * <p>
* <b>NOTE:</b> this method merges all given {@link IndexReader}s in one
* merge. If you intend to merge a large number of readers, it may be better
* to call this method multiple times, each time with a small set of readers.
* In principle, if you use a merge policy with a {@code mergeFactor} or
* {@code maxMergeAtOnce} parameter, you should pass that many readers in one
- * call. Also, if the given readers are {@link DirectoryReader}s, they can be
- * opened with {@code termIndexInterval=-1} to save RAM, since during merge
- * the in-memory structure is not used. See
- * {@link DirectoryReader#open(Directory, int)}.
+ * call.
*
* <p>
* <b>NOTE</b>: if you call {@link #close(boolean)} with <tt>false</tt>, which
@@ -2462,11 +2462,20 @@ public class IndexWriter implements Clos
String mergedName = newSegmentName();
final List<AtomicReader> mergeReaders = new ArrayList<AtomicReader>();
for (IndexReader indexReader : readers) {
- numDocs += indexReader.numDocs();
- for (AtomicReaderContext ctx : indexReader.leaves()) {
- mergeReaders.add(ctx.reader());
+ if (indexReader.numDocs() > 0) {
+ numDocs += indexReader.numDocs();
+ for (AtomicReaderContext ctx : indexReader.leaves()) {
+ if (ctx.reader().numDocs() > 0) { // drop empty (or all deleted) segments
+ mergeReaders.add(ctx.reader());
+ }
+ }
}
}
+
+ if (mergeReaders.isEmpty()) { // no segments with documents to add
+ return;
+ }
+
final IOContext context = new IOContext(new MergeInfo(numDocs, -1, true, -1));
// TODO: somehow we should fix this merge so it's
@@ -2476,7 +2485,7 @@ public class IndexWriter implements Clos
SegmentInfo info = new SegmentInfo(directory, Constants.LUCENE_MAIN_VERSION, mergedName, -1,
false, codec, null, null);
- SegmentMerger merger = new SegmentMerger(mergeReaders, info, infoStream, trackingDir, config.getTermIndexInterval(),
+ SegmentMerger merger = new SegmentMerger(mergeReaders, info, infoStream, trackingDir,
MergeState.CheckAbort.NONE, globalFieldNumberMap, context);
MergeState mergeState;
@@ -3658,7 +3667,7 @@ public class IndexWriter implements Clos
// Hold onto the "live" reader; we will use this to
// commit merged deletes
final ReadersAndLiveDocs rld = readerPool.get(info, true);
- SegmentReader reader = rld.getMergeReader(context);
+ SegmentReader reader = rld.getReader(context);
assert reader != null;
// Carefully pull the most recent live docs:
@@ -3715,7 +3724,7 @@ public class IndexWriter implements Clos
// we pass merge.getMergeReaders() instead of merge.readers to allow the
// OneMerge to return a view over the actual segments to merge
final SegmentMerger merger = new SegmentMerger(merge.getMergeReaders(),
- merge.info.info, infoStream, dirWrapper, config.getTermIndexInterval(),
+ merge.info.info, infoStream, dirWrapper,
checkAbort, globalFieldNumberMap, context);
merge.checkAborted(directory);
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java Tue Aug 13 04:06:18 2013
@@ -26,6 +26,8 @@ import org.apache.lucene.index.IndexWrit
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.PrintStreamInfoStream;
+import org.apache.lucene.util.SetOnce;
+import org.apache.lucene.util.SetOnce.AlreadySetException;
import org.apache.lucene.util.Version;
/**
@@ -70,9 +72,6 @@ public final class IndexWriterConfig ext
CREATE_OR_APPEND
}
- /** Default value is 32. Change using {@link #setTermIndexInterval(int)}. */
- public static final int DEFAULT_TERM_INDEX_INTERVAL = 32; // TODO: this should be private to the codec, not settable here
-
/** Denotes a flush trigger is disabled. */
public final static int DISABLE_AUTO_FLUSH = -1;
@@ -98,9 +97,6 @@ public final class IndexWriterConfig ext
/** Default setting for {@link #setReaderPooling}. */
public final static boolean DEFAULT_READER_POOLING = false;
- /** Default value is 1. Change using {@link #setReaderTermsIndexDivisor(int)}. */
- public static final int DEFAULT_READER_TERMS_INDEX_DIVISOR = DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR;
-
/** Default value is 1945. Change using {@link #setRAMPerThreadHardLimitMB(int)} */
public static final int DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB = 1945;
@@ -132,6 +128,21 @@ public final class IndexWriterConfig ext
return WRITE_LOCK_TIMEOUT;
}
+ // indicates whether this config instance is already attached to a writer.
+ // not final so that it can be cloned properly.
+ private SetOnce<IndexWriter> writer = new SetOnce<IndexWriter>();
+
+ /**
+ * Sets the {@link IndexWriter} this config is attached to.
+ *
+ * @throws AlreadySetException
+ * if this config is already attached to a writer.
+ */
+ IndexWriterConfig setIndexWriter(IndexWriter writer) {
+ this.writer.set(writer);
+ return this;
+ }
+
/**
* Creates a new config that with defaults that match the specified
* {@link Version} as well as the default {@link
@@ -152,6 +163,8 @@ public final class IndexWriterConfig ext
try {
IndexWriterConfig clone = (IndexWriterConfig) super.clone();
+ clone.writer = writer.clone();
+
// Mostly shallow clone, but do a deepish clone of
// certain objects that have state that cannot be shared
// across IW instances:
@@ -484,16 +497,6 @@ public final class IndexWriterConfig ext
return super.getRAMBufferSizeMB();
}
- @Override
- public int getReaderTermsIndexDivisor() {
- return super.getReaderTermsIndexDivisor();
- }
-
- @Override
- public int getTermIndexInterval() {
- return super.getTermIndexInterval();
- }
-
/** If non-null, information about merges, deletes and a
* message when maxFieldLength is reached will be printed
* to this.
@@ -536,17 +539,15 @@ public final class IndexWriterConfig ext
}
@Override
- public IndexWriterConfig setReaderTermsIndexDivisor(int divisor) {
- return (IndexWriterConfig) super.setReaderTermsIndexDivisor(divisor);
- }
-
- @Override
- public IndexWriterConfig setTermIndexInterval(int interval) {
- return (IndexWriterConfig) super.setTermIndexInterval(interval);
- }
-
public IndexWriterConfig setUseCompoundFile(boolean useCompoundFile) {
return (IndexWriterConfig) super.setUseCompoundFile(useCompoundFile);
}
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(super.toString());
+ sb.append("writer=").append(writer).append("\n");
+ return sb.toString();
+ }
+
}