You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2012/01/08 21:20:55 UTC
svn commit: r1228941 [1/2] - in /lucene/dev/branches/lucene3305: ./ lucene/
lucene/contrib/ lucene/contrib/misc/src/java/org/apache/lucene/store/
lucene/src/java/org/apache/lucene/codecs/lucene40/values/
lucene/src/java/org/apache/lucene/index/ lucene/...
Author: uschindler
Date: Sun Jan 8 20:20:52 2012
New Revision: 1228941
URL: http://svn.apache.org/viewvc?rev=1228941&view=rev
Log:
LUCENE-3305: Merge up to trunk rev 1228938. Rebuild TokenDict FSTs
Modified:
lucene/dev/branches/lucene3305/ (props changed)
lucene/dev/branches/lucene3305/lucene/ (props changed)
lucene/dev/branches/lucene3305/lucene/CHANGES.txt
lucene/dev/branches/lucene3305/lucene/contrib/CHANGES.txt
lucene/dev/branches/lucene3305/lucene/contrib/misc/src/java/org/apache/lucene/store/DirectIOLinuxDirectory.java
lucene/dev/branches/lucene3305/lucene/contrib/misc/src/java/org/apache/lucene/store/WindowsDirectory.java
lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java
lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java
lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/index/MultiDocValues.java
lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java
lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/BufferedIndexInput.java
lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/ByteArrayDataInput.java
lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/CompoundFileDirectory.java
lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/DataInput.java
lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/Directory.java
lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/MMapDirectory.java
lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/NIOFSDirectory.java
lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/RAMInputStream.java
lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/SimpleFSDirectory.java
lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/util/fst/Builder.java
lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/util/fst/FST.java
lucene/dev/branches/lucene3305/lucene/src/site/build/site/contributions.html
lucene/dev/branches/lucene3305/lucene/src/site/build/site/demo.html
lucene/dev/branches/lucene3305/lucene/src/site/build/site/demo2.html
lucene/dev/branches/lucene3305/lucene/src/site/build/site/fileformats.html
lucene/dev/branches/lucene3305/lucene/src/site/build/site/gettingstarted.html
lucene/dev/branches/lucene3305/lucene/src/site/build/site/index.html
lucene/dev/branches/lucene3305/lucene/src/site/build/site/linkmap.html
lucene/dev/branches/lucene3305/lucene/src/site/build/site/lucene-contrib/index.html
lucene/dev/branches/lucene3305/lucene/src/site/build/site/queryparsersyntax.html
lucene/dev/branches/lucene3305/lucene/src/site/build/site/scoring.html
lucene/dev/branches/lucene3305/lucene/src/site/build/site/systemrequirements.html
lucene/dev/branches/lucene3305/lucene/src/site/src/documentation/content/xdocs/site.xml
lucene/dev/branches/lucene3305/lucene/src/site/src/documentation/skins/lucene/xslt/html/site-to-xhtml.xsl
lucene/dev/branches/lucene3305/lucene/src/test/org/apache/lucene/index/TestDocValuesIndexing.java
lucene/dev/branches/lucene3305/lucene/src/test/org/apache/lucene/index/TestTypePromotion.java
lucene/dev/branches/lucene3305/lucene/src/test/org/apache/lucene/search/TestSort.java
lucene/dev/branches/lucene3305/lucene/src/test/org/apache/lucene/util/TestByteBlockPool.java
lucene/dev/branches/lucene3305/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java
lucene/dev/branches/lucene3305/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/resources/org/apache/lucene/analysis/kuromoji/dict/TokenInfoDictionary$fst.dat
lucene/dev/branches/lucene3305/solr/ (props changed)
lucene/dev/branches/lucene3305/solr/core/ (props changed)
lucene/dev/branches/lucene3305/solr/core/src/test/ (props changed)
lucene/dev/branches/lucene3305/solr/core/src/test/org/apache/solr/analysis/TestSynonymFilterFactory.java
lucene/dev/branches/lucene3305/solr/site/ (props changed)
lucene/dev/branches/lucene3305/solr/site-src/ (props changed)
lucene/dev/branches/lucene3305/solr/site-src/src/documentation/content/xdocs/site.xml
lucene/dev/branches/lucene3305/solr/site-src/src/documentation/skins/lucene/xslt/html/site-to-xhtml.xsl
lucene/dev/branches/lucene3305/solr/site/features.html
lucene/dev/branches/lucene3305/solr/site/features.pdf
lucene/dev/branches/lucene3305/solr/site/index.html
lucene/dev/branches/lucene3305/solr/site/index.pdf
lucene/dev/branches/lucene3305/solr/site/issue_tracking.html
lucene/dev/branches/lucene3305/solr/site/issue_tracking.pdf
lucene/dev/branches/lucene3305/solr/site/linkmap.html
lucene/dev/branches/lucene3305/solr/site/linkmap.pdf
lucene/dev/branches/lucene3305/solr/site/mailing_lists.html
lucene/dev/branches/lucene3305/solr/site/mailing_lists.pdf
lucene/dev/branches/lucene3305/solr/site/skin/basic.css
lucene/dev/branches/lucene3305/solr/site/skin/print.css
lucene/dev/branches/lucene3305/solr/site/skin/profile.css
lucene/dev/branches/lucene3305/solr/site/skin/screen.css
lucene/dev/branches/lucene3305/solr/site/tutorial.html
lucene/dev/branches/lucene3305/solr/site/tutorial.pdf
lucene/dev/branches/lucene3305/solr/site/version_control.html
lucene/dev/branches/lucene3305/solr/site/version_control.pdf
Modified: lucene/dev/branches/lucene3305/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/CHANGES.txt?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/lucene3305/lucene/CHANGES.txt Sun Jan 8 20:20:52 2012
@@ -714,6 +714,11 @@ Changes in backwards compatibility polic
contrib/queryparser. If you have used those classes in your code
just add the lucene-queryparser.jar file to your classpath.
(Uwe Schindler)
+
+* LUCENE-3681: FST now stores labels for BYTE2 input type as 2 bytes
+ instead of vInt; this can make FSTs smaller and faster, but it is a
+ break in the binary format so if you had built and saved any FSTs
+ then you need to rebuild them. (Robert Muir, Mike McCandless)
Security fixes
Modified: lucene/dev/branches/lucene3305/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/contrib/CHANGES.txt?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/branches/lucene3305/lucene/contrib/CHANGES.txt Sun Jan 8 20:20:52 2012
@@ -136,6 +136,14 @@ Bug Fixes
* LUCENE-3609: Fix regression in BooleanFilter, introduced in Lucene 3.5,
to correctly handle minShouldMatch behaviour of previous versions.
(Shay Banon, Uwe Schindler)
+
+ * LUCENE-3668: For a multi-token synonym mapping to a single token,
+ SynonymFilter will now set the start offset of the synonym token to
+ the start offset of the first matched token, and the end offset of
+ the synonym token to the end offset of the last matched token.
+ This way if the synonym token is used for highlighting, it will
+ cover all tokens it had matched. (Koji Sekiguchi, Robert Muir,
+ Mike McCandless)
Documentation
Modified: lucene/dev/branches/lucene3305/lucene/contrib/misc/src/java/org/apache/lucene/store/DirectIOLinuxDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/contrib/misc/src/java/org/apache/lucene/store/DirectIOLinuxDirectory.java?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/contrib/misc/src/java/org/apache/lucene/store/DirectIOLinuxDirectory.java (original)
+++ lucene/dev/branches/lucene3305/lucene/contrib/misc/src/java/org/apache/lucene/store/DirectIOLinuxDirectory.java Sun Jan 8 20:20:52 2012
@@ -17,6 +17,7 @@ package org.apache.lucene.store;
* the License.
*/
+import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.io.FileInputStream;
@@ -340,7 +341,7 @@ public class DirectIOLinuxDirectory exte
throw new IOException(ioe.getMessage() + ": " + this, ioe);
}
if (n < 0) {
- throw new IOException("eof: " + this);
+ throw new EOFException("read past EOF: " + this);
}
buffer.rewind();
}
Modified: lucene/dev/branches/lucene3305/lucene/contrib/misc/src/java/org/apache/lucene/store/WindowsDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/contrib/misc/src/java/org/apache/lucene/store/WindowsDirectory.java?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/contrib/misc/src/java/org/apache/lucene/store/WindowsDirectory.java (original)
+++ lucene/dev/branches/lucene3305/lucene/contrib/misc/src/java/org/apache/lucene/store/WindowsDirectory.java Sun Jan 8 20:20:52 2012
@@ -97,7 +97,7 @@ public class WindowsDirectory extends FS
}
if (bytesRead != length) {
- throw new EOFException("Read past EOF (resource: " + this + ")");
+ throw new EOFException("read past EOF: " + this);
}
}
Modified: lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java (original)
+++ lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java Sun Jan 8 20:20:52 2012
@@ -28,6 +28,7 @@ import org.apache.lucene.index.DocValues
import org.apache.lucene.index.SortedBytesMergeUtils;
import org.apache.lucene.index.DocValues.SortedSource;
import org.apache.lucene.index.DocValues.Type;
+import org.apache.lucene.index.SortedBytesMergeUtils.IndexOutputBytesRefConsumer;
import org.apache.lucene.index.SortedBytesMergeUtils.MergeContext;
import org.apache.lucene.index.SortedBytesMergeUtils.SortedSourceSlice;
import org.apache.lucene.index.MergeState;
@@ -66,11 +67,11 @@ class FixedSortedBytesImpl {
throws IOException {
boolean success = false;
try {
- final MergeContext ctx = SortedBytesMergeUtils.init(Type.BYTES_FIXED_SORTED, docValues, comp, mergeState);
- List<SortedSourceSlice> slices = SortedBytesMergeUtils.buildSlices(mergeState, docValues, ctx);
+ final MergeContext ctx = SortedBytesMergeUtils.init(Type.BYTES_FIXED_SORTED, docValues, comp, mergeState.mergedDocCount);
+ List<SortedSourceSlice> slices = SortedBytesMergeUtils.buildSlices(mergeState.docBase, mergeState.docMaps, docValues, ctx);
final IndexOutput datOut = getOrCreateDataOut();
datOut.writeInt(ctx.sizePerValues);
- final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, datOut, slices);
+ final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, new IndexOutputBytesRefConsumer(datOut), slices);
final IndexOutput idxOut = getOrCreateIndexOut();
idxOut.writeInt(maxOrd);
Modified: lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java (original)
+++ lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java Sun Jan 8 20:20:52 2012
@@ -28,6 +28,7 @@ import org.apache.lucene.index.DocValues
import org.apache.lucene.index.SortedBytesMergeUtils;
import org.apache.lucene.index.DocValues.SortedSource;
import org.apache.lucene.index.DocValues.Type;
+import org.apache.lucene.index.SortedBytesMergeUtils.IndexOutputBytesRefConsumer;
import org.apache.lucene.index.SortedBytesMergeUtils.MergeContext;
import org.apache.lucene.index.SortedBytesMergeUtils.SortedSourceSlice;
import org.apache.lucene.index.MergeState;
@@ -67,12 +68,12 @@ final class VarSortedBytesImpl {
throws IOException {
boolean success = false;
try {
- MergeContext ctx = SortedBytesMergeUtils.init(Type.BYTES_VAR_SORTED, docValues, comp, mergeState);
- final List<SortedSourceSlice> slices = SortedBytesMergeUtils.buildSlices(mergeState, docValues, ctx);
+ MergeContext ctx = SortedBytesMergeUtils.init(Type.BYTES_VAR_SORTED, docValues, comp, mergeState.mergedDocCount);
+ final List<SortedSourceSlice> slices = SortedBytesMergeUtils.buildSlices(mergeState.docBase, mergeState.docMaps, docValues, ctx);
IndexOutput datOut = getOrCreateDataOut();
ctx.offsets = new long[1];
- final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, datOut, slices);
+ final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, new IndexOutputBytesRefConsumer(datOut), slices);
final long[] offsets = ctx.offsets;
maxBytes = offsets[maxOrd-1];
final IndexOutput idxOut = getOrCreateIndexOut();
Modified: lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/index/MultiDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/index/MultiDocValues.java?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/index/MultiDocValues.java (original)
+++ lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/index/MultiDocValues.java Sun Jan 8 20:20:52 2012
@@ -20,11 +20,17 @@ import java.io.IOException;
import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Comparator;
import java.util.List;
+import org.apache.lucene.index.SortedBytesMergeUtils.MergeContext;
+import org.apache.lucene.index.SortedBytesMergeUtils.SortedSourceSlice;
+import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.ReaderUtil.Gather;
+import org.apache.lucene.util.packed.PackedInts.Reader;
/**
* A wrapper for compound IndexReader providing access to per segment
@@ -143,6 +149,8 @@ public class MultiDocValues extends DocV
switch(promoted) {
case BYTES_FIXED_DEREF:
case BYTES_FIXED_STRAIGHT:
+ case BYTES_FIXED_SORTED:
+ assert promotedType[0].getValueSize() >= 0;
slice.docValues = new EmptyFixedDocValues(slice.length, promoted, promotedType[0].getValueSize());
break;
default:
@@ -179,7 +187,6 @@ public class MultiDocValues extends DocV
return emptySource.type();
}
-
@Override
public Source getDirectSource() throws IOException {
return emptySource;
@@ -276,6 +283,59 @@ public class MultiDocValues extends DocV
}
@Override
+ public SortedSource asSortedSource() {
+ try {
+ if (type == Type.BYTES_FIXED_SORTED || type == Type.BYTES_VAR_SORTED) {
+ DocValues[] values = new DocValues[slices.length];
+ Comparator<BytesRef> comp = null;
+ for (int i = 0; i < values.length; i++) {
+ values[i] = slices[i].docValues;
+ if (!(values[i] instanceof EmptyDocValues)) {
+ Comparator<BytesRef> comparator = values[i].getDirectSource()
+ .asSortedSource().getComparator();
+ assert comp == null || comp == comparator;
+ comp = comparator;
+ }
+ }
+ assert comp != null;
+ final int globalNumDocs = globalNumDocs();
+ final MergeContext ctx = SortedBytesMergeUtils.init(type, values,
+ comp, globalNumDocs);
+ List<SortedSourceSlice> slices = SortedBytesMergeUtils.buildSlices(
+ docBases(), new int[values.length][], values, ctx);
+ RecordingBytesRefConsumer consumer = new RecordingBytesRefConsumer(
+ type);
+ final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, consumer,
+ slices);
+ final int[] docToOrd = new int[globalNumDocs];
+ for (SortedSourceSlice slice : slices) {
+ slice.toAbsolutOrds(docToOrd);
+ }
+ return new MultiSortedSource(type, comp, consumer.pagedBytes,
+ ctx.sizePerValues, maxOrd, docToOrd, consumer.ordToOffset);
+ }
+ } catch (IOException e) {
+ throw new RuntimeException("load failed", e);
+ }
+ return super.asSortedSource();
+ }
+
+ private int globalNumDocs() {
+ int docs = 0;
+ for (int i = 0; i < slices.length; i++) {
+ docs += slices[i].length;
+ }
+ return docs;
+ }
+
+ private int[] docBases() {
+ int[] docBases = new int[slices.length];
+ for (int i = 0; i < slices.length; i++) {
+ docBases[i] = slices[i].start;
+ }
+ return docBases;
+ }
+
public boolean hasArray() {
boolean oneRealSource = false;
for (DocValuesSlice slice : slices) {
@@ -346,12 +406,79 @@ public class MultiDocValues extends DocV
}
}
}
+
+ private static final class RecordingBytesRefConsumer implements SortedBytesMergeUtils.BytesRefConsumer {
+ private final static int PAGED_BYTES_BITS = 15;
+ final PagedBytes pagedBytes = new PagedBytes(PAGED_BYTES_BITS);
+ long[] ordToOffset;
+
+ public RecordingBytesRefConsumer(Type type) {
+ ordToOffset = type == Type.BYTES_VAR_SORTED ? new long[2] : null;
+ }
+ @Override
+ public void consume(BytesRef ref, int ord, long offset) throws IOException {
+ pagedBytes.copy(ref);
+ if (ordToOffset != null) {
+ if (ord+1 >= ordToOffset.length) {
+ ordToOffset = ArrayUtil.grow(ordToOffset, ord + 2);
+ }
+ ordToOffset[ord+1] = offset;
+ }
+ }
+
+ }
+
+ private static final class MultiSortedSource extends SortedSource {
+ private final PagedBytes.Reader data;
+ private final int[] docToOrd;
+ private final long[] ordToOffset;
+ private int size;
+ private int valueCount;
+ public MultiSortedSource(Type type, Comparator<BytesRef> comparator, PagedBytes pagedBytes, int size, int numValues, int[] docToOrd, long[] ordToOffset) {
+ super(type, comparator);
+ data = pagedBytes.freeze(true);
+ this.size = size;
+ this.valueCount = numValues;
+ this.docToOrd = docToOrd;
+ this.ordToOffset = ordToOffset;
+ }
+
+ @Override
+ public int ord(int docID) {
+ return docToOrd[docID];
+ }
+
+ @Override
+ public BytesRef getByOrd(int ord, BytesRef bytesRef) {
+ int size = this.size;
+ long offset = (ord*size);
+ if (ordToOffset != null) {
+ offset = ordToOffset[ord];
+ size = (int) (ordToOffset[1 + ord] - offset);
+ }
+ if (size < 0) {
+ System.out.println();
+ }
+ assert size >=0;
+ return data.fillSlice(bytesRef, offset, size);
+ }
+
+ @Override
+ public Reader getDocToOrd() {
+ return null;
+ }
+
+ @Override
+ public int getValueCount() {
+ return valueCount;
+ }
+ }
// TODO: this is dup of DocValues.getDefaultSource()?
- private static class EmptySource extends Source {
+ private static class EmptySource extends SortedSource {
public EmptySource(Type type) {
- super(type);
+ super(type, BytesRef.getUTF8SortedAsUnicodeComparator());
}
@Override
@@ -369,14 +496,46 @@ public class MultiDocValues extends DocV
public long getInt(int docID) {
return 0;
}
+
+ @Override
+ public SortedSource asSortedSource() {
+ if (type() == Type.BYTES_FIXED_SORTED || type() == Type.BYTES_VAR_SORTED) {
+
+ }
+ return super.asSortedSource();
+ }
+
+ @Override
+ public int ord(int docID) {
+ return 0;
+ }
+
+ @Override
+ public BytesRef getByOrd(int ord, BytesRef bytesRef) {
+ bytesRef.length = 0;
+ bytesRef.offset = 0;
+ return bytesRef;
+ }
+
+ @Override
+ public Reader getDocToOrd() {
+ return null;
+ }
+
+ @Override
+ public int getValueCount() {
+ return 1;
+ }
+
}
private static class EmptyFixedSource extends EmptySource {
private final int valueSize;
-
+ private final byte[] valueArray;
public EmptyFixedSource(Type type, int valueSize) {
super(type);
this.valueSize = valueSize;
+ valueArray = new byte[valueSize];
}
@Override
@@ -396,6 +555,14 @@ public class MultiDocValues extends DocV
public long getInt(int docID) {
return 0;
}
+
+ @Override
+ public BytesRef getByOrd(int ord, BytesRef bytesRef) {
+ bytesRef.bytes = valueArray;
+ bytesRef.length = valueSize;
+ bytesRef.offset = 0;
+ return bytesRef;
+ }
}
@Override
@@ -412,4 +579,6 @@ public class MultiDocValues extends DocV
public Source getDirectSource() throws IOException {
return new MultiSource(slices, starts, true, type);
}
+
+
}
Modified: lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java (original)
+++ lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java Sun Jan 8 20:20:52 2012
@@ -25,7 +25,6 @@ import java.util.List;
import org.apache.lucene.index.DocValues.SortedSource;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.DocValues.Type;
-import org.apache.lucene.index.MergeState.IndexReaderAndLiveDocs;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
@@ -35,11 +34,6 @@ import org.apache.lucene.util.packed.Pac
/**
* @lucene.internal
*/
-// TODO: generalize this a bit more:
-// * remove writing (like indexoutput) from here
-// * just take IndexReaders (not IR&LiveDocs), doesnt care about liveDocs
-// * hook into MultiDocValues to make a MultiSortedSource
-// * maybe DV merging should then just use MultiDocValues for simplicity?
public final class SortedBytesMergeUtils {
private SortedBytesMergeUtils() {
@@ -47,7 +41,7 @@ public final class SortedBytesMergeUtils
}
public static MergeContext init(Type type, DocValues[] docValues,
- Comparator<BytesRef> comp, MergeState mergeState) {
+ Comparator<BytesRef> comp, int mergeDocCount) {
int size = -1;
if (type == Type.BYTES_FIXED_SORTED) {
for (DocValues indexDocValues : docValues) {
@@ -58,7 +52,7 @@ public final class SortedBytesMergeUtils
}
assert size >= 0;
}
- return new MergeContext(comp, mergeState, size, type);
+ return new MergeContext(comp, mergeDocCount, size, type);
}
public static final class MergeContext {
@@ -69,7 +63,7 @@ public final class SortedBytesMergeUtils
public final int[] docToEntry;
public long[] offsets; // if non-null #mergeRecords collects byte offsets here
- public MergeContext(Comparator<BytesRef> comp, MergeState mergeState,
+ public MergeContext(Comparator<BytesRef> comp, int mergeDocCount,
int size, Type type) {
assert type == Type.BYTES_FIXED_SORTED || type == Type.BYTES_VAR_SORTED;
this.comp = comp;
@@ -79,11 +73,15 @@ public final class SortedBytesMergeUtils
missingValue.grow(size);
missingValue.length = size;
}
- docToEntry = new int[mergeState.mergedDocCount];
+ docToEntry = new int[mergeDocCount];
+ }
+
+ public int getMergeDocCount() {
+ return docToEntry.length;
}
}
- public static List<SortedSourceSlice> buildSlices(MergeState mergeState,
+ public static List<SortedSourceSlice> buildSlices(int[] docBases ,int[][] docMaps,
DocValues[] docValues, MergeContext ctx) throws IOException {
final List<SortedSourceSlice> slices = new ArrayList<SortedSourceSlice>();
for (int i = 0; i < docValues.length; i++) {
@@ -92,13 +90,13 @@ public final class SortedBytesMergeUtils
if (docValues[i] != null
&& (directSource = docValues[i].getDirectSource()) != null) {
final SortedSourceSlice slice = new SortedSourceSlice(i, directSource
- .asSortedSource(), mergeState, ctx.docToEntry);
+ .asSortedSource(), docBases, ctx.getMergeDocCount(), ctx.docToEntry);
nextSlice = slice;
} else {
nextSlice = new SortedSourceSlice(i, new MissingValueSource(ctx),
- mergeState, ctx.docToEntry);
+ docBases, ctx.getMergeDocCount(), ctx.docToEntry);
}
- createOrdMapping(mergeState, nextSlice);
+ createOrdMapping(docBases, docMaps, nextSlice);
slices.add(nextSlice);
}
return Collections.unmodifiableList(slices);
@@ -113,12 +111,12 @@ public final class SortedBytesMergeUtils
* mapping in docIDToRelativeOrd. After the merge SortedSourceSlice#ordMapping
* contains the new global ordinals for the relative index.
*/
- private static void createOrdMapping(MergeState mergeState,
+ private static void createOrdMapping(int[] docBases ,int[][] docMaps,
SortedSourceSlice currentSlice) {
final int readerIdx = currentSlice.readerIdx;
- final int[] currentDocMap = mergeState.docMaps[readerIdx];
+ final int[] currentDocMap = docMaps[readerIdx];
final int docBase = currentSlice.docToOrdStart;
- assert docBase == mergeState.docBase[readerIdx];
+ assert docBase == docBases[readerIdx];
if (currentDocMap != null) { // we have deletes
for (int i = 0; i < currentDocMap.length; i++) {
final int doc = currentDocMap[i];
@@ -131,11 +129,7 @@ public final class SortedBytesMergeUtils
}
}
} else { // no deletes
- final IndexReaderAndLiveDocs indexReaderAndLiveDocs = mergeState.readers
- .get(readerIdx);
- final int numDocs = indexReaderAndLiveDocs.reader.numDocs();
- assert indexReaderAndLiveDocs.liveDocs == null;
- assert currentSlice.docToOrdEnd - currentSlice.docToOrdStart == numDocs;
+ final int numDocs = currentSlice.docToOrdEnd - currentSlice.docToOrdStart;
for (int doc = 0; doc < numDocs; doc++) {
final int ord = currentSlice.source.ord(doc);
currentSlice.docIDToRelativeOrd[docBase + doc] = ord;
@@ -145,7 +139,7 @@ public final class SortedBytesMergeUtils
}
}
- public static int mergeRecords(MergeContext ctx, IndexOutput datOut,
+ public static int mergeRecords(MergeContext ctx, BytesRefConsumer consumer,
List<SortedSourceSlice> slices) throws IOException {
final RecordMerger merger = new RecordMerger(new MergeQueue(slices.size(),
ctx.comp), slices.toArray(new SortedSourceSlice[0]));
@@ -159,22 +153,38 @@ public final class SortedBytesMergeUtils
currentMergedBytes = merger.current;
assert ctx.sizePerValues == -1 || ctx.sizePerValues == currentMergedBytes.length : "size: "
+ ctx.sizePerValues + " spare: " + currentMergedBytes.length;
-
+ offset += currentMergedBytes.length;
if (recordOffsets) {
- offset += currentMergedBytes.length;
if (merger.currentOrd >= offsets.length) {
offsets = ArrayUtil.grow(offsets, merger.currentOrd + 1);
}
offsets[merger.currentOrd] = offset;
}
- datOut.writeBytes(currentMergedBytes.bytes, currentMergedBytes.offset,
- currentMergedBytes.length);
+ consumer.consume(currentMergedBytes, merger.currentOrd, offset);
merger.pushTop();
}
ctx.offsets = offsets;
assert offsets == null || offsets[merger.currentOrd - 1] == offset;
return merger.currentOrd;
}
+
+ public static interface BytesRefConsumer {
+ public void consume(BytesRef ref, int ord, long offset) throws IOException;
+ }
+
+ public static final class IndexOutputBytesRefConsumer implements BytesRefConsumer {
+ private final IndexOutput datOut;
+
+ public IndexOutputBytesRefConsumer(IndexOutput datOut) {
+ this.datOut = datOut;
+ }
+
+ @Override
+ public void consume(BytesRef currentMergedBytes, int ord, long offset) throws IOException {
+ datOut.writeBytes(currentMergedBytes.bytes, currentMergedBytes.offset,
+ currentMergedBytes.length);
+ }
+ }
private static final class RecordMerger {
private final MergeQueue queue;
@@ -241,22 +251,22 @@ public final class SortedBytesMergeUtils
/* the currently merged relative ordinal */
int relativeOrd = -1;
- SortedSourceSlice(int readerIdx, SortedSource source, MergeState state,
+ SortedSourceSlice(int readerIdx, SortedSource source, int[] docBase, int mergeDocCount,
int[] docToOrd) {
super();
this.readerIdx = readerIdx;
this.source = source;
this.docIDToRelativeOrd = docToOrd;
this.ordMapping = new int[source.getValueCount()];
- this.docToOrdStart = state.docBase[readerIdx];
- this.docToOrdEnd = this.docToOrdStart + numDocs(state, readerIdx);
+ this.docToOrdStart = docBase[readerIdx];
+ this.docToOrdEnd = this.docToOrdStart + numDocs(docBase, mergeDocCount, readerIdx);
}
- private static int numDocs(MergeState state, int readerIndex) {
- if (readerIndex == state.docBase.length - 1) {
- return state.mergedDocCount - state.docBase[readerIndex];
+ private static int numDocs(int[] docBase, int mergedDocCount, int readerIndex) {
+ if (readerIndex == docBase.length - 1) {
+ return mergedDocCount - docBase[readerIndex];
}
- return state.docBase[readerIndex + 1] - state.docBase[readerIndex];
+ return docBase[readerIndex + 1] - docBase[readerIndex];
}
BytesRef next() {
@@ -269,6 +279,16 @@ public final class SortedBytesMergeUtils
}
return null;
}
+
+ public int[] toAbsolutOrds(int[] docToOrd) {
+ for (int i = docToOrdStart; i < docToOrdEnd; i++) {
+ final int mappedOrd = docIDToRelativeOrd[i];
+ assert mappedOrd < ordMapping.length;
+ assert ordMapping[mappedOrd] > 0 : "illegal mapping ord maps to an unreferenced value";
+ docToOrd[i] = ordMapping[mappedOrd] -1;
+ }
+ return docToOrd;
+ }
public void writeOrds(PackedInts.Writer writer) throws IOException {
for (int i = docToOrdStart; i < docToOrdEnd; i++) {
Modified: lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/BufferedIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/BufferedIndexInput.java?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/BufferedIndexInput.java (original)
+++ lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/BufferedIndexInput.java Sun Jan 8 20:20:52 2012
@@ -17,6 +17,7 @@ package org.apache.lucene.store;
* limitations under the License.
*/
+import java.io.EOFException;
import java.io.IOException;
/** Base implementation class for buffered {@link IndexInput}. */
@@ -138,7 +139,7 @@ public abstract class BufferedIndexInput
if(bufferLength<len){
// Throw an exception when refill() could not read len bytes:
System.arraycopy(buffer, 0, b, offset, bufferLength);
- throw new IOException("read past EOF");
+ throw new EOFException("read past EOF: " + this);
} else {
System.arraycopy(buffer, 0, b, offset, len);
bufferPosition=len;
@@ -153,7 +154,7 @@ public abstract class BufferedIndexInput
// had in the buffer.
long after = bufferStart+bufferPosition+len;
if(after > length())
- throw new IOException("read past EOF");
+ throw new EOFException("read past EOF: " + this);
readInternal(b, offset, len);
bufferStart = after;
bufferPosition = 0;
@@ -231,7 +232,7 @@ public abstract class BufferedIndexInput
end = length();
int newLength = (int)(end - start);
if (newLength <= 0)
- throw new IOException("read past EOF");
+ throw new EOFException("read past EOF: " + this);
if (buffer == null) {
newBuffer(new byte[bufferSize]); // allocate buffer lazily
Modified: lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/ByteArrayDataInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/ByteArrayDataInput.java?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/ByteArrayDataInput.java (original)
+++ lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/ByteArrayDataInput.java Sun Jan 8 20:20:52 2012
@@ -100,11 +100,11 @@ public final class ByteArrayDataInput ex
@Override
public int readVInt() {
- checkBounds();
+ assert checkBounds();
byte b = bytes[pos++];
int i = b & 0x7F;
for (int shift = 7; (b & 0x80) != 0; shift += 7) {
- checkBounds();
+ assert checkBounds();
b = bytes[pos++];
i |= (b & 0x7F) << shift;
}
@@ -113,11 +113,11 @@ public final class ByteArrayDataInput ex
@Override
public long readVLong() {
- checkBounds();
+ assert checkBounds();
byte b = bytes[pos++];
long i = b & 0x7F;
for (int shift = 7; (b & 0x80) != 0; shift += 7) {
- checkBounds();
+ assert checkBounds();
b = bytes[pos++];
i |= (b & 0x7FL) << shift;
}
@@ -127,7 +127,7 @@ public final class ByteArrayDataInput ex
// NOTE: AIOOBE not EOF if you read too much
@Override
public byte readByte() {
- checkBounds();
+ assert checkBounds();
return bytes[pos++];
}
@@ -140,7 +140,6 @@ public final class ByteArrayDataInput ex
}
private boolean checkBounds() {
- assert pos < limit;
- return true;
+ return pos < limit;
}
}
Modified: lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/CompoundFileDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/CompoundFileDirectory.java?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/CompoundFileDirectory.java (original)
+++ lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/CompoundFileDirectory.java Sun Jan 8 20:20:52 2012
@@ -216,7 +216,7 @@ public final class CompoundFileDirectory
final String id = IndexFileNames.stripSegmentName(name);
final FileEntry entry = entries.get(id);
if (entry == null) {
- throw new IOException("No sub-file with id " + id + " found (fileName=" + name + " files: " + entries.keySet() + ")");
+ throw new FileNotFoundException("No sub-file with id " + id + " found (fileName=" + name + " files: " + entries.keySet() + ")");
}
return handle.openSlice(name, entry.offset, entry.length);
}
@@ -310,7 +310,7 @@ public final class CompoundFileDirectory
final String id = IndexFileNames.stripSegmentName(name);
final FileEntry entry = entries.get(id);
if (entry == null) {
- throw new IOException("No sub-file with id " + id + " found (fileName=" + name + " files: " + entries.keySet() + ")");
+ throw new FileNotFoundException("No sub-file with id " + id + " found (fileName=" + name + " files: " + entries.keySet() + ")");
}
return new IndexInputSlicer() {
@Override
Modified: lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/DataInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/DataInput.java?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/DataInput.java (original)
+++ lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/DataInput.java Sun Jan 8 20:20:52 2012
@@ -21,6 +21,8 @@ import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
+import org.apache.lucene.util.IOUtils;
+
/**
* Abstract base class for performing read operations of Lucene's low-level
* data types.
@@ -166,7 +168,7 @@ public abstract class DataInput implemen
int length = readVInt();
final byte[] bytes = new byte[length];
readBytes(bytes, 0, length);
- return new String(bytes, 0, length, "UTF-8");
+ return new String(bytes, 0, length, IOUtils.CHARSET_UTF_8);
}
/** Returns a clone of this stream.
Modified: lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/Directory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/Directory.java?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/Directory.java (original)
+++ lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/Directory.java Sun Jan 8 20:20:52 2012
@@ -17,6 +17,7 @@ package org.apache.lucene.store;
* limitations under the License.
*/
+import java.io.EOFException;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.Closeable;
@@ -305,7 +306,7 @@ public abstract class Directory implemen
protected void readInternal(byte[] b, int offset, int len) throws IOException {
long start = getFilePointer();
if(start + len > length)
- throw new IOException("read past EOF");
+ throw new EOFException("read past EOF: " + this);
base.seek(fileOffset + start);
base.readBytes(b, offset, len, false);
}
@@ -338,7 +339,7 @@ public abstract class Directory implemen
if (numBytes > 0) {
long start = getFilePointer();
if (start + numBytes > length) {
- throw new IOException("read past EOF");
+ throw new EOFException("read past EOF: " + this);
}
base.seek(fileOffset + start);
base.copyBytes(out, numBytes);
Modified: lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/MMapDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/MMapDirectory.java?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/MMapDirectory.java (original)
+++ lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/MMapDirectory.java Sun Jan 8 20:20:52 2012
@@ -17,6 +17,7 @@ package org.apache.lucene.store;
* limitations under the License.
*/
+import java.io.EOFException;
import java.io.IOException;
import java.io.File;
import java.io.RandomAccessFile;
@@ -303,7 +304,7 @@ public class MMapDirectory extends FSDir
do {
curBufIndex++;
if (curBufIndex >= buffers.length) {
- throw new IOException("read past EOF: " + this);
+ throw new EOFException("read past EOF: " + this);
}
curBuf = buffers[curBufIndex];
curBuf.position(0);
@@ -326,7 +327,7 @@ public class MMapDirectory extends FSDir
offset += curAvail;
curBufIndex++;
if (curBufIndex >= buffers.length) {
- throw new IOException("read past EOF: " + this);
+ throw new EOFException("read past EOF: " + this);
}
curBuf = buffers[curBufIndex];
curBuf.position(0);
@@ -394,12 +395,12 @@ public class MMapDirectory extends FSDir
if (pos < 0L) {
throw new IllegalArgumentException("Seeking to negative position: " + this);
}
- throw new IOException("seek past EOF");
+ throw new EOFException("seek past EOF: " + this);
} catch (IllegalArgumentException iae) {
if (pos < 0L) {
throw new IllegalArgumentException("Seeking to negative position: " + this);
}
- throw new IOException("seek past EOF: " + this);
+ throw new EOFException("seek past EOF: " + this);
} catch (NullPointerException npe) {
throw new AlreadyClosedException("MMapIndexInput already closed: " + this);
}
Modified: lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/NIOFSDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/NIOFSDirectory.java?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/NIOFSDirectory.java (original)
+++ lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/NIOFSDirectory.java Sun Jan 8 20:20:52 2012
@@ -182,7 +182,7 @@ public class NIOFSDirectory extends FSDi
long pos = getFilePointer() + off;
if (pos + len > end) {
- throw new EOFException("read past EOF (resource: " + this + ")");
+ throw new EOFException("read past EOF: " + this);
}
try {
Modified: lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/RAMInputStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/RAMInputStream.java?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/RAMInputStream.java (original)
+++ lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/RAMInputStream.java Sun Jan 8 20:20:52 2012
@@ -91,7 +91,7 @@ public class RAMInputStream extends Inde
if (currentBufferIndex >= file.numBuffers()) {
// end of file reached, no more buffers left
if (enforceEOF) {
- throw new EOFException("Read past EOF (resource: " + this + ")");
+ throw new EOFException("read past EOF: " + this);
} else {
// Force EOF if a read takes place at this position
currentBufferIndex--;
Modified: lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/SimpleFSDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/SimpleFSDirectory.java?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/SimpleFSDirectory.java (original)
+++ lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/store/SimpleFSDirectory.java Sun Jan 8 20:20:52 2012
@@ -17,6 +17,7 @@ package org.apache.lucene.store;
* limitations under the License.
*/
+import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
@@ -145,7 +146,7 @@ public class SimpleFSDirectory extends F
int total = 0;
if (position + len > end) {
- throw new IOException("read past EOF: " + this);
+ throw new EOFException("read past EOF: " + this);
}
try {
Modified: lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/util/fst/Builder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/util/fst/Builder.java?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/util/fst/Builder.java (original)
+++ lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/util/fst/Builder.java Sun Jan 8 20:20:52 2012
@@ -165,8 +165,8 @@ public class Builder<T> {
}
/** Pass false to disable the array arc optimization
- * while building the FST. This is necessary if
- * encoding a single arc may take more than 255 bytes. */
+ * while building the FST; this will make the resulting
+ * FST smaller but slower to traverse. */
public void setAllowArrayArcs(boolean b) {
fst.setAllowArrayArcs(b);
}
Modified: lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/util/fst/FST.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/util/fst/FST.java?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/util/fst/FST.java (original)
+++ lucene/dev/branches/lucene3305/lucene/src/java/org/apache/lucene/util/fst/FST.java Sun Jan 8 20:20:52 2012
@@ -92,7 +92,10 @@ public class FST<T> {
/** Changed numBytesPerArc for array'd case from byte to int. */
private final static int VERSION_INT_NUM_BYTES_PER_ARC = 1;
- private final static int VERSION_CURRENT = VERSION_INT_NUM_BYTES_PER_ARC;
+ /** Write BYTE2 labels as 2-byte short, not vInt. */
+ private final static int VERSION_SHORT_BYTE2_LABELS = 2;
+
+ private final static int VERSION_CURRENT = VERSION_SHORT_BYTE2_LABELS;
// Never serialized; just used to represent the virtual
// final node w/ no arcs:
@@ -199,7 +202,9 @@ public class FST<T> {
public FST(DataInput in, Outputs<T> outputs) throws IOException {
this.outputs = outputs;
writer = null;
- CodecUtil.checkHeader(in, FILE_FORMAT_NAME, VERSION_INT_NUM_BYTES_PER_ARC, VERSION_INT_NUM_BYTES_PER_ARC);
+ // NOTE: only reads most recent format; we don't have
+ // back-compat promise for FSTs (they are experimental):
+ CodecUtil.checkHeader(in, FILE_FORMAT_NAME, VERSION_SHORT_BYTE2_LABELS, VERSION_SHORT_BYTE2_LABELS);
if (in.readByte() == 1) {
// accepts empty string
int numBytes = in.readVInt();
@@ -389,7 +394,7 @@ public class FST<T> {
writer.writeByte((byte) v);
} else if (inputType == INPUT_TYPE.BYTE2) {
assert v <= 65535: "v=" + v;
- writer.writeVInt(v);
+ writer.writeShort((short) v);
} else {
//writeInt(v);
writer.writeVInt(v);
@@ -399,7 +404,11 @@ public class FST<T> {
int readLabel(DataInput in) throws IOException {
final int v;
if (inputType == INPUT_TYPE.BYTE1) {
+ // Unsigned byte:
v = in.readByte()&0xFF;
+ } else if (inputType == INPUT_TYPE.BYTE2) {
+ // Unsigned short:
+ v = in.readShort()&0xFFFF;
} else {
v = in.readVInt();
}
Modified: lucene/dev/branches/lucene3305/lucene/src/site/build/site/contributions.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/site/build/site/contributions.html?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/site/build/site/contributions.html (original)
+++ lucene/dev/branches/lucene3305/lucene/src/site/build/site/contributions.html Sun Jan 8 20:20:52 2012
@@ -199,6 +199,9 @@ document.write("Last Published: " + docu
<div class="menuitem">
<a href="http://wiki.apache.org/lucene-java">Wiki</a>
</div>
+<div class="menuitem">
+<a href="http://lucene.apache.org/privacy.html">Privacy Policy</a>
+</div>
</div>
<div id="credit"></div>
<div id="roundbottom">
@@ -727,5 +730,18 @@ document.write("Last Published: " + docu
|end bottomstrip
+-->
</div>
+<script type="text/javascript">
+
+ var _gaq = _gaq || [];
+ _gaq.push(['_setAccount', 'UA-94576-12']);
+ _gaq.push(['_trackPageview']);
+
+ (function() {
+ var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+ ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+ })();
+
+</script>
</body>
</html>
Modified: lucene/dev/branches/lucene3305/lucene/src/site/build/site/demo.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/site/build/site/demo.html?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/site/build/site/demo.html (original)
+++ lucene/dev/branches/lucene3305/lucene/src/site/build/site/demo.html Sun Jan 8 20:20:52 2012
@@ -199,6 +199,9 @@ document.write("Last Published: " + docu
<div class="menuitem">
<a href="http://wiki.apache.org/lucene-java">Wiki</a>
</div>
+<div class="menuitem">
+<a href="http://lucene.apache.org/privacy.html">Privacy Policy</a>
+</div>
</div>
<div id="credit"></div>
<div id="roundbottom">
@@ -342,5 +345,18 @@ document.write("Last Published: " + docu
|end bottomstrip
+-->
</div>
+<script type="text/javascript">
+
+ var _gaq = _gaq || [];
+ _gaq.push(['_setAccount', 'UA-94576-12']);
+ _gaq.push(['_trackPageview']);
+
+ (function() {
+ var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+ ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+ })();
+
+</script>
</body>
</html>
Modified: lucene/dev/branches/lucene3305/lucene/src/site/build/site/demo2.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/site/build/site/demo2.html?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/site/build/site/demo2.html (original)
+++ lucene/dev/branches/lucene3305/lucene/src/site/build/site/demo2.html Sun Jan 8 20:20:52 2012
@@ -199,6 +199,9 @@ document.write("Last Published: " + docu
<div class="menuitem">
<a href="http://wiki.apache.org/lucene-java">Wiki</a>
</div>
+<div class="menuitem">
+<a href="http://lucene.apache.org/privacy.html">Privacy Policy</a>
+</div>
</div>
<div id="credit"></div>
<div id="roundbottom">
@@ -392,5 +395,18 @@ document.write("Last Published: " + docu
|end bottomstrip
+-->
</div>
+<script type="text/javascript">
+
+ var _gaq = _gaq || [];
+ _gaq.push(['_setAccount', 'UA-94576-12']);
+ _gaq.push(['_trackPageview']);
+
+ (function() {
+ var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+ ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+ })();
+
+</script>
</body>
</html>
Modified: lucene/dev/branches/lucene3305/lucene/src/site/build/site/fileformats.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/site/build/site/fileformats.html?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/site/build/site/fileformats.html (original)
+++ lucene/dev/branches/lucene3305/lucene/src/site/build/site/fileformats.html Sun Jan 8 20:20:52 2012
@@ -199,6 +199,9 @@ document.write("Last Published: " + docu
<div class="menuitem">
<a href="http://wiki.apache.org/lucene-java">Wiki</a>
</div>
+<div class="menuitem">
+<a href="http://lucene.apache.org/privacy.html">Privacy Policy</a>
+</div>
</div>
<div id="credit"></div>
<div id="roundbottom">
@@ -2623,5 +2626,18 @@ document.write("Last Published: " + docu
|end bottomstrip
+-->
</div>
+<script type="text/javascript">
+
+ var _gaq = _gaq || [];
+ _gaq.push(['_setAccount', 'UA-94576-12']);
+ _gaq.push(['_trackPageview']);
+
+ (function() {
+ var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+ ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+ })();
+
+</script>
</body>
</html>
Modified: lucene/dev/branches/lucene3305/lucene/src/site/build/site/gettingstarted.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/site/build/site/gettingstarted.html?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/site/build/site/gettingstarted.html (original)
+++ lucene/dev/branches/lucene3305/lucene/src/site/build/site/gettingstarted.html Sun Jan 8 20:20:52 2012
@@ -199,6 +199,9 @@ document.write("Last Published: " + docu
<div class="menuitem">
<a href="http://wiki.apache.org/lucene-java">Wiki</a>
</div>
+<div class="menuitem">
+<a href="http://lucene.apache.org/privacy.html">Privacy Policy</a>
+</div>
</div>
<div id="credit"></div>
<div id="roundbottom">
@@ -281,5 +284,18 @@ document.write("Last Published: " + docu
|end bottomstrip
+-->
</div>
+<script type="text/javascript">
+
+ var _gaq = _gaq || [];
+ _gaq.push(['_setAccount', 'UA-94576-12']);
+ _gaq.push(['_trackPageview']);
+
+ (function() {
+ var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+ ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+ })();
+
+</script>
</body>
</html>
Modified: lucene/dev/branches/lucene3305/lucene/src/site/build/site/index.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/site/build/site/index.html?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/site/build/site/index.html (original)
+++ lucene/dev/branches/lucene3305/lucene/src/site/build/site/index.html Sun Jan 8 20:20:52 2012
@@ -197,6 +197,9 @@ document.write("Last Published: " + docu
<div class="menuitem">
<a href="http://wiki.apache.org/lucene-java">Wiki</a>
</div>
+<div class="menuitem">
+<a href="http://lucene.apache.org/privacy.html">Privacy Policy</a>
+</div>
</div>
<div id="credit">
<hr>
@@ -251,5 +254,18 @@ document.write("Last Published: " + docu
|end bottomstrip
+-->
</div>
+<script type="text/javascript">
+
+ var _gaq = _gaq || [];
+ _gaq.push(['_setAccount', 'UA-94576-12']);
+ _gaq.push(['_trackPageview']);
+
+ (function() {
+ var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+ ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+ })();
+
+</script>
</body>
</html>
Modified: lucene/dev/branches/lucene3305/lucene/src/site/build/site/linkmap.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/site/build/site/linkmap.html?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/site/build/site/linkmap.html (original)
+++ lucene/dev/branches/lucene3305/lucene/src/site/build/site/linkmap.html Sun Jan 8 20:20:52 2012
@@ -197,6 +197,9 @@ document.write("Last Published: " + docu
<div class="menuitem">
<a href="http://wiki.apache.org/lucene-java">Wiki</a>
</div>
+<div class="menuitem">
+<a href="http://lucene.apache.org/privacy.html">Privacy Policy</a>
+</div>
</div>
<div id="credit"></div>
<div id="roundbottom">
@@ -421,6 +424,12 @@ document.write("Last Published: " + docu
<a href="http://wiki.apache.org/lucene-java">Wiki</a> ___________________ <em>wiki</em>
</li>
</ul>
+
+<ul>
+<li>
+<a href="http://lucene.apache.org/privacy.html">Privacy Policy</a> ___________________ <em>privacy</em>
+</li>
+</ul>
</ul>
</ul>
@@ -455,5 +464,18 @@ document.write("Last Published: " + docu
|end bottomstrip
+-->
</div>
+<script type="text/javascript">
+
+ var _gaq = _gaq || [];
+ _gaq.push(['_setAccount', 'UA-94576-12']);
+ _gaq.push(['_trackPageview']);
+
+ (function() {
+ var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+ ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+ })();
+
+</script>
</body>
</html>
Modified: lucene/dev/branches/lucene3305/lucene/src/site/build/site/lucene-contrib/index.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/site/build/site/lucene-contrib/index.html?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/site/build/site/lucene-contrib/index.html (original)
+++ lucene/dev/branches/lucene3305/lucene/src/site/build/site/lucene-contrib/index.html Sun Jan 8 20:20:52 2012
@@ -199,6 +199,9 @@ document.write("Last Published: " + docu
<div class="menuitem">
<a href="http://wiki.apache.org/lucene-java">Wiki</a>
</div>
+<div class="menuitem">
+<a href="http://lucene.apache.org/privacy.html">Privacy Policy</a>
+</div>
</div>
<div id="credit">
<hr>
@@ -400,5 +403,18 @@ document.write("Last Published: " + docu
|end bottomstrip
+-->
</div>
+<script type="text/javascript">
+
+ var _gaq = _gaq || [];
+ _gaq.push(['_setAccount', 'UA-94576-12']);
+ _gaq.push(['_trackPageview']);
+
+ (function() {
+ var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+ ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+ })();
+
+</script>
</body>
</html>
Modified: lucene/dev/branches/lucene3305/lucene/src/site/build/site/queryparsersyntax.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/site/build/site/queryparsersyntax.html?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/site/build/site/queryparsersyntax.html (original)
+++ lucene/dev/branches/lucene3305/lucene/src/site/build/site/queryparsersyntax.html Sun Jan 8 20:20:52 2012
@@ -199,6 +199,9 @@ document.write("Last Published: " + docu
<div class="menuitem">
<a href="http://wiki.apache.org/lucene-java">Wiki</a>
</div>
+<div class="menuitem">
+<a href="http://lucene.apache.org/privacy.html">Privacy Policy</a>
+</div>
</div>
<div id="credit"></div>
<div id="roundbottom">
@@ -507,5 +510,18 @@ document.write("Last Published: " + docu
|end bottomstrip
+-->
</div>
+<script type="text/javascript">
+
+ var _gaq = _gaq || [];
+ _gaq.push(['_setAccount', 'UA-94576-12']);
+ _gaq.push(['_trackPageview']);
+
+ (function() {
+ var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+ ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+ })();
+
+</script>
</body>
</html>
Modified: lucene/dev/branches/lucene3305/lucene/src/site/build/site/scoring.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/site/build/site/scoring.html?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/site/build/site/scoring.html (original)
+++ lucene/dev/branches/lucene3305/lucene/src/site/build/site/scoring.html Sun Jan 8 20:20:52 2012
@@ -199,6 +199,9 @@ document.write("Last Published: " + docu
<div class="menuitem">
<a href="http://wiki.apache.org/lucene-java">Wiki</a>
</div>
+<div class="menuitem">
+<a href="http://lucene.apache.org/privacy.html">Privacy Policy</a>
+</div>
</div>
<div id="credit"></div>
<div id="roundbottom">
@@ -570,5 +573,18 @@ document.write("Last Published: " + docu
|end bottomstrip
+-->
</div>
+<script type="text/javascript">
+
+ var _gaq = _gaq || [];
+ _gaq.push(['_setAccount', 'UA-94576-12']);
+ _gaq.push(['_trackPageview']);
+
+ (function() {
+ var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+ ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+ })();
+
+</script>
</body>
</html>
Modified: lucene/dev/branches/lucene3305/lucene/src/site/build/site/systemrequirements.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/site/build/site/systemrequirements.html?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/site/build/site/systemrequirements.html (original)
+++ lucene/dev/branches/lucene3305/lucene/src/site/build/site/systemrequirements.html Sun Jan 8 20:20:52 2012
@@ -197,6 +197,9 @@ document.write("Last Published: " + docu
<div class="menuitem">
<a href="http://wiki.apache.org/lucene-java">Wiki</a>
</div>
+<div class="menuitem">
+<a href="http://lucene.apache.org/privacy.html">Privacy Policy</a>
+</div>
</div>
<div id="credit"></div>
<div id="roundbottom">
@@ -270,5 +273,18 @@ document.write("Last Published: " + docu
|end bottomstrip
+-->
</div>
+<script type="text/javascript">
+
+ var _gaq = _gaq || [];
+ _gaq.push(['_setAccount', 'UA-94576-12']);
+ _gaq.push(['_trackPageview']);
+
+ (function() {
+ var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+ ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+ })();
+
+</script>
</body>
</html>
Modified: lucene/dev/branches/lucene3305/lucene/src/site/src/documentation/content/xdocs/site.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/site/src/documentation/content/xdocs/site.xml?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/site/src/documentation/content/xdocs/site.xml (original)
+++ lucene/dev/branches/lucene3305/lucene/src/site/src/documentation/content/xdocs/site.xml Sun Jan 8 20:20:52 2012
@@ -78,6 +78,7 @@ See http://forrest.apache.org/docs/linki
<query-syntax label="Query Syntax" href="queryparsersyntax.html"/>
<scoring label="Scoring" href="scoring.html"/>
<wiki label="Wiki" href="ext:wiki" />
+ <privacy label="Privacy Policy" href="ext:privacy"/>
</docs>
<!--
@@ -113,7 +114,7 @@ See http://forrest.apache.org/docs/linki
<webapp href="docs/your-project.html#webapp"/>
<dtd-docs href="docs/dtd-docs.html"/>
</forrest>
-
+ <privacy href="http://lucene.apache.org/privacy.html"/>
<cocoon href="http://cocoon.apache.org/"/>
<xml.apache.org href="http://xml.apache.org/"/>
<issues href="http://issues.apache.org/jira/browse/LUCENE"/>
Modified: lucene/dev/branches/lucene3305/lucene/src/site/src/documentation/skins/lucene/xslt/html/site-to-xhtml.xsl
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/site/src/documentation/skins/lucene/xslt/html/site-to-xhtml.xsl?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/site/src/documentation/skins/lucene/xslt/html/site-to-xhtml.xsl (original)
+++ lucene/dev/branches/lucene3305/lucene/src/site/src/documentation/skins/lucene/xslt/html/site-to-xhtml.xsl Sun Jan 8 20:20:52 2012
@@ -374,6 +374,19 @@ footer, searchbar, css etc. As input, i
|end bottomstrip
+</xsl:comment>
</div>
+ <script type="text/javascript">
+
+ var _gaq = _gaq || [];
+ _gaq.push(['_setAccount', 'UA-94576-12']);
+ _gaq.push(['_trackPageview']);
+
+ (function() {
+ var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+ ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+ })();
+
+</script>
</body>
</html>
</xsl:template>
Modified: lucene/dev/branches/lucene3305/lucene/src/test/org/apache/lucene/index/TestDocValuesIndexing.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/test/org/apache/lucene/index/TestDocValuesIndexing.java?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/test/org/apache/lucene/index/TestDocValuesIndexing.java (original)
+++ lucene/dev/branches/lucene3305/lucene/src/test/org/apache/lucene/index/TestDocValuesIndexing.java Sun Jan 8 20:20:52 2012
@@ -21,8 +21,14 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
+import java.util.Comparator;
import java.util.EnumSet;
+import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
@@ -33,6 +39,7 @@ import org.apache.lucene.document.String
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.DocValues.SortedSource;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
@@ -47,6 +54,7 @@ import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
@@ -539,6 +547,7 @@ public class TestDocValuesIndexing exten
return MultiDocValues.getDocValues(reader, field);
}
+ @SuppressWarnings("fallthrough")
private Source getSource(DocValues values) throws IOException {
// getSource uses cache internally
switch(random.nextInt(5)) {
@@ -547,7 +556,9 @@ public class TestDocValuesIndexing exten
case 2:
return values.getDirectSource();
case 1:
- return values.getSource();
+ if(values.type() == Type.BYTES_VAR_SORTED || values.type() == Type.BYTES_FIXED_SORTED) {
+ return values.getSource().asSortedSource();
+ }
default:
return values.getSource();
}
@@ -705,4 +716,100 @@ public class TestDocValuesIndexing exten
r.close();
d.close();
}
+
+ public void testSortedBytes() throws IOException {
+ Type[] types = new Type[] { Type.BYTES_FIXED_SORTED, Type.BYTES_VAR_SORTED };
+ for (Type type : types) {
+ boolean fixed = type == Type.BYTES_FIXED_SORTED;
+ final Directory d = newDirectory();
+ IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT,
+ new MockAnalyzer(random));
+ IndexWriter w = new IndexWriter(d, cfg);
+ Comparator<BytesRef> comp = BytesRef.getUTF8SortedAsUnicodeComparator();
+ int numDocs = atLeast(100);
+ BytesRefHash hash = new BytesRefHash();
+ Map<String, String> docToString = new HashMap<String, String>();
+ int len = 1 + random.nextInt(50);
+ for (int i = 0; i < numDocs; i++) {
+ Document doc = new Document();
+ doc.add(newField("id", "" + i, TextField.TYPE_STORED));
+ DocValuesField f = new DocValuesField("field");
+ String string =fixed ? _TestUtil.randomFixedByteLengthUnicodeString(random,
+ len) : _TestUtil.randomRealisticUnicodeString(random, 1, len);
+ hash.add(new BytesRef(string));
+ docToString.put("" + i, string);
+
+ f.setBytes(new BytesRef(string), type, comp);
+ doc.add(f);
+ w.addDocument(doc);
+ }
+ if (rarely()) {
+ w.commit();
+ }
+ int numDocsNoValue = atLeast(10);
+ for (int i = 0; i < numDocsNoValue; i++) {
+ Document doc = new Document();
+ doc.add(newField("id", "noValue", TextField.TYPE_STORED));
+ w.addDocument(doc);
+ }
+ BytesRef bytesRef = new BytesRef(fixed ? len : 0);
+ bytesRef.offset = 0;
+ bytesRef.length = fixed ? len : 0;
+ hash.add(bytesRef); // add empty value for the gaps
+ if (rarely()) {
+ w.commit();
+ }
+ for (int i = 0; i < numDocs; i++) {
+ Document doc = new Document();
+ String id = "" + i + numDocs;
+ doc.add(newField("id", id, TextField.TYPE_STORED));
+ DocValuesField f = new DocValuesField("field");
+ String string = fixed ? _TestUtil.randomFixedByteLengthUnicodeString(random,
+ len) : _TestUtil.randomRealisticUnicodeString(random, 1, len);
+ hash.add(new BytesRef(string));
+ docToString.put(id, string);
+ f.setBytes(new BytesRef(string), type, comp);
+ doc.add(f);
+ w.addDocument(doc);
+ }
+ w.commit();
+ IndexReader reader = w.getReader();
+ DocValues docValues = MultiDocValues.getDocValues(reader, "field");
+ Source source = getSource(docValues);
+ SortedSource asSortedSource = source.asSortedSource();
+ int[] sort = hash.sort(comp);
+ BytesRef expected = new BytesRef();
+ BytesRef actual = new BytesRef();
+ assertEquals(hash.size(), asSortedSource.getValueCount());
+ for (int i = 0; i < hash.size(); i++) {
+ hash.get(sort[i], expected);
+ asSortedSource.getByOrd(i, actual);
+ assertEquals(expected.utf8ToString(), actual.utf8ToString());
+ int ord = asSortedSource.getByValue(expected, actual);
+ assertEquals(i, ord);
+ }
+ reader = new SlowMultiReaderWrapper(reader);
+ Set<Entry<String, String>> entrySet = docToString.entrySet();
+
+ for (Entry<String, String> entry : entrySet) {
+ int docId = docId(reader, new Term("id", entry.getKey()));
+ expected.copyChars(entry.getValue());
+ assertEquals(expected, asSortedSource.getBytes(docId, actual));
+ }
+
+ reader.close();
+ w.close();
+ d.close();
+ }
+ }
+
+ public int docId(IndexReader reader, Term term) throws IOException {
+ int docFreq = reader.docFreq(term);
+ assertEquals(1, docFreq);
+ DocsEnum termDocsEnum = reader.termDocsEnum(null, term.field, term.bytes, false);
+ int nextDoc = termDocsEnum.nextDoc();
+ assertEquals(DocsEnum.NO_MORE_DOCS, termDocsEnum.nextDoc());
+ return nextDoc;
+
+ }
}
Modified: lucene/dev/branches/lucene3305/lucene/src/test/org/apache/lucene/index/TestTypePromotion.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/test/org/apache/lucene/index/TestTypePromotion.java?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/test/org/apache/lucene/index/TestTypePromotion.java (original)
+++ lucene/dev/branches/lucene3305/lucene/src/test/org/apache/lucene/index/TestTypePromotion.java Sun Jan 8 20:20:52 2012
@@ -100,14 +100,12 @@ public class TestTypePromotion extends L
randomValueType(types, random), values, num_1 + num_2, num_3);
writer_2.commit();
writer_2.close();
- if (random.nextBoolean()) {
+ if (rarely()) {
writer.addIndexes(dir_2);
} else {
// do a real merge here
IndexReader open = IndexReader.open(dir_2);
- // we cannot use SlowMR for sorted bytes, because it returns a null sortedsource
- boolean useSlowMRWrapper = types != SORTED_BYTES && random.nextBoolean();
- writer.addIndexes(useSlowMRWrapper ? new SlowMultiReaderWrapper(open) : open);
+ writer.addIndexes(new SlowMultiReaderWrapper(open));
open.close();
}
dir_2.close();
Modified: lucene/dev/branches/lucene3305/lucene/src/test/org/apache/lucene/search/TestSort.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/test/org/apache/lucene/search/TestSort.java?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/test/org/apache/lucene/search/TestSort.java (original)
+++ lucene/dev/branches/lucene3305/lucene/src/test/org/apache/lucene/search/TestSort.java Sun Jan 8 20:20:52 2012
@@ -258,7 +258,7 @@ public class TestSort extends LuceneTest
//System.out.println(writer.getSegmentCount());
writer.close();
IndexReader reader = IndexReader.open(indexStore);
- return new IndexSearcher (reader);
+ return newSearcher(reader);
}
public String getRandomNumberString(int num, int low, int high) {
@@ -1210,35 +1210,11 @@ public class TestSort extends LuceneTest
assertMatches( null, searcher, query, sort, expectedResult );
}
- private static boolean hasSlowMultiReaderWrapper(IndexReader r) {
- if (r instanceof SlowMultiReaderWrapper) {
- return true;
- } else {
- IndexReader[] subReaders = r.getSequentialSubReaders();
- if (subReaders != null) {
- for (IndexReader subReader : subReaders) {
- if (hasSlowMultiReaderWrapper(subReader)) {
- return true;
- }
- }
- }
- }
- return false;
- }
// make sure the documents returned by the search match the expected list
private void assertMatches(String msg, IndexSearcher searcher, Query query, Sort sort,
String expectedResult) throws IOException {
- for(SortField sortField : sort.getSort()) {
- if (sortField.getUseIndexValues() && sortField.getType() == SortField.Type.STRING) {
- if (hasSlowMultiReaderWrapper(searcher.getIndexReader())) {
- // Cannot use STRING DocValues sort with SlowMultiReaderWrapper
- return;
- }
- }
- }
-
//ScoreDoc[] result = searcher.search (query, null, 1000, sort).scoreDocs;
TopDocs hits = searcher.search(query, null, Math.max(1, expectedResult.length()), sort);
ScoreDoc[] result = hits.scoreDocs;
Modified: lucene/dev/branches/lucene3305/lucene/src/test/org/apache/lucene/util/TestByteBlockPool.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/lucene/src/test/org/apache/lucene/util/TestByteBlockPool.java?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/lucene/src/test/org/apache/lucene/util/TestByteBlockPool.java (original)
+++ lucene/dev/branches/lucene3305/lucene/src/test/org/apache/lucene/util/TestByteBlockPool.java Sun Jan 8 20:20:52 2012
@@ -1,5 +1,6 @@
package org.apache.lucene.util;
+import java.io.EOFException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
@@ -59,7 +60,7 @@ public class TestByteBlockPool extends L
try {
input.readByte();
fail("must be EOF");
- } catch (IOException e) {
+ } catch (EOFException e) {
// expected - read past EOF
}
dir.close();
Modified: lucene/dev/branches/lucene3305/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java?rev=1228941&r1=1228940&r2=1228941&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java Sun Jan 8 20:20:52 2012
@@ -153,12 +153,15 @@ public final class SynonymFilter extends
// Holds pending output synonyms for one future position:
private static class PendingOutputs {
CharsRef[] outputs;
+ int[] endOffsets;
int upto;
int count;
int posIncr = 1;
+ int lastEndOffset;
public PendingOutputs() {
outputs = new CharsRef[1];
+ endOffsets = new int[1];
}
public void reset() {
@@ -168,6 +171,7 @@ public final class SynonymFilter extends
public CharsRef pullNext() {
assert upto < count;
+ lastEndOffset = endOffsets[upto];
final CharsRef result = outputs[upto++];
posIncr = 0;
if (upto == count) {
@@ -176,16 +180,29 @@ public final class SynonymFilter extends
return result;
}
- public void add(char[] output, int offset, int len) {
+ public int getLastEndOffset() {
+ return lastEndOffset;
+ }
+
+ public void add(char[] output, int offset, int len, int endOffset) {
if (count == outputs.length) {
final CharsRef[] next = new CharsRef[ArrayUtil.oversize(1+count, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
System.arraycopy(outputs, 0, next, 0, count);
outputs = next;
}
+ if (count == endOffsets.length) {
+ final int[] next = new int[ArrayUtil.oversize(1+count, RamUsageEstimator.NUM_BYTES_INT)];
+ System.arraycopy(endOffsets, 0, next, 0, count);
+ endOffsets = next;
+ }
if (outputs[count] == null) {
outputs[count] = new CharsRef();
}
outputs[count].copyChars(output, offset, len);
+ // endOffset can be -1, in which case we should simply
+ // use the endOffset of the input token, or X >= 0, in
+ // which case we use X as the endOffset for this output
+ endOffsets[count] = endOffset;
count++;
}
};
@@ -281,6 +298,7 @@ public final class SynonymFilter extends
// Holds the longest match we've seen so far:
BytesRef matchOutput = null;
int matchInputLength = 0;
+ int matchEndOffset = -1;
BytesRef pendingOutput = fst.outputs.getNoOutput();
fst.getFirstArc(scratchArc);
@@ -297,6 +315,8 @@ public final class SynonymFilter extends
final int bufferLen;
//System.out.println(" cycle nextRead=" + curNextRead + " nextWrite=" + nextWrite);
+ int inputEndOffset = 0;
+
if (curNextRead == nextWrite) {
// We used up our lookahead buffer of input tokens
@@ -317,6 +337,7 @@ public final class SynonymFilter extends
final PendingInput input = futureInputs[nextWrite];
input.startOffset = offsetAtt.startOffset();
input.endOffset = offsetAtt.endOffset();
+ inputEndOffset = input.endOffset;
//System.out.println(" new token=" + new String(buffer, 0, bufferLen));
if (nextRead != nextWrite) {
capture();
@@ -335,6 +356,7 @@ public final class SynonymFilter extends
// Still in our lookahead
buffer = futureInputs[curNextRead].term.chars;
bufferLen = futureInputs[curNextRead].term.length;
+ inputEndOffset = futureInputs[curNextRead].endOffset;
//System.out.println(" old token=" + new String(buffer, 0, bufferLen));
}
@@ -360,6 +382,7 @@ public final class SynonymFilter extends
if (scratchArc.isFinal()) {
matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput);
matchInputLength = tokenCount;
+ matchEndOffset = inputEndOffset;
//System.out.println(" found matchLength=" + matchInputLength + " output=" + matchOutput);
}
@@ -390,7 +413,7 @@ public final class SynonymFilter extends
if (matchOutput != null) {
//System.out.println(" add matchLength=" + matchInputLength + " output=" + matchOutput);
inputSkipCount = matchInputLength;
- addOutput(matchOutput, matchInputLength);
+ addOutput(matchOutput, matchInputLength, matchEndOffset);
} else if (nextRead != nextWrite) {
// Even though we had no match here, we set to 1
// because we need to skip current input token before
@@ -404,7 +427,7 @@ public final class SynonymFilter extends
}
// Interleaves all output tokens onto the futureOutputs:
- private void addOutput(BytesRef bytes, int matchInputLength) {
+ private void addOutput(BytesRef bytes, int matchInputLength, int matchEndOffset) {
bytesReader.reset(bytes.bytes, bytes.offset, bytes.length);
final int code = bytesReader.readVInt();
@@ -425,7 +448,21 @@ public final class SynonymFilter extends
// Caller is not allowed to have empty string in
// the output:
assert outputLen > 0: "output contains empty string: " + scratchChars;
- futureOutputs[outputUpto].add(scratchChars.chars, lastStart, outputLen);
+ final int endOffset;
+ if (chIDX == chEnd && lastStart == scratchChars.offset) {
+ // This rule had a single output token, so, we set
+ // this output's endOffset to the current
+ // endOffset (ie, endOffset of the last input
+ // token it matched):
+ endOffset = matchEndOffset;
+ } else {
+ // This rule has more than one output token; we
+ // can't pick any particular endOffset for this
+ // case, so, we inherit the endOffset for the
+ // input token which this output overlaps:
+ endOffset = -1;
+ }
+ futureOutputs[outputUpto].add(scratchChars.chars, lastStart, outputLen, endOffset);
//System.out.println(" " + new String(scratchChars.chars, lastStart, outputLen) + " outputUpto=" + outputUpto);
lastStart = 1+chIDX;
//System.out.println(" slot=" + outputUpto + " keepOrig=" + keepOrig);
@@ -507,7 +544,11 @@ public final class SynonymFilter extends
clearAttributes();
termAtt.copyBuffer(output.chars, output.offset, output.length);
typeAtt.setType(TYPE_SYNONYM);
- offsetAtt.setOffset(input.startOffset, input.endOffset);
+ int endOffset = outputs.getLastEndOffset();
+ if (endOffset == -1) {
+ endOffset = input.endOffset;
+ }
+ offsetAtt.setOffset(input.startOffset, endOffset);
posIncrAtt.setPositionIncrement(posIncr);
if (outputs.count == 0) {
// Done with the buffered input and all outputs at