You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/01/15 01:51:19 UTC
svn commit: r1433250 - in
/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene:
codecs/lucene41/ util/fst/
Author: rmuir
Date: Tue Jan 15 00:51:19 2013
New Revision: 1433250
URL: http://svn.apache.org/viewvc?rev=1433250&view=rev
Log:
add in-ram sortedbytes
Modified:
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesConsumer.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesProducer.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java?rev=1433250&r1=1433249&r2=1433250&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java Tue Jan 15 00:51:19 2013
@@ -144,7 +144,7 @@ public class Lucene41Codec extends Codec
private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41");
// nocommit
- private final SimpleDocValuesFormat defaultDVFormat = SimpleDocValuesFormat.forName("Disk");
+ private final SimpleDocValuesFormat defaultDVFormat = SimpleDocValuesFormat.forName("Lucene41");
private final SimpleNormsFormat simpleNormsFormat = new Lucene41SimpleNormsFormat();
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesConsumer.java?rev=1433250&r1=1433249&r2=1433250&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesConsumer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesConsumer.java Tue Jan 15 00:51:19 2013
@@ -30,6 +30,12 @@ import org.apache.lucene.index.SegmentWr
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.fst.Builder;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.FST.INPUT_TYPE;
+import org.apache.lucene.util.fst.PositiveIntOutputs;
+import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedInts.FormatAndBits;
@@ -50,7 +56,6 @@ class Lucene41SimpleDocValuesConsumer ex
static final byte FST = 2;
final IndexOutput data, meta;
- final int maxDoc;
Lucene41SimpleDocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
boolean success = false;
@@ -61,7 +66,6 @@ class Lucene41SimpleDocValuesConsumer ex
String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
meta = state.directory.createOutput(metaName, state.context);
CodecUtil.writeHeader(meta, metaCodec, VERSION_CURRENT);
- maxDoc = state.segmentInfo.getDocCount();
success = true;
} finally {
if (!success) {
@@ -217,7 +221,24 @@ class Lucene41SimpleDocValuesConsumer ex
@Override
public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
- throw new AssertionError();
+ // write the ordinals as numerics
+ addNumericField(field, docToOrd);
+
+ // write the values as FST
+ meta.writeVInt(field.number);
+ meta.writeByte(FST);
+ meta.writeLong(data.getFilePointer());
+ PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+ Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1, outputs);
+ IntsRef scratch = new IntsRef();
+ long ord = 0;
+ for (BytesRef v : values) {
+ builder.add(Util.toIntsRef(v, scratch), ord);
+ ord++;
+ }
+ FST<Long> fst = builder.finish();
+ fst.save(data);
+ meta.writeVInt((int)ord);
}
// nocommit: can/should we make override merge + make it smarter to pull the values
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesProducer.java?rev=1433250&r1=1433249&r2=1433250&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesProducer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesProducer.java Tue Jan 15 00:51:19 2013
@@ -24,6 +24,7 @@ import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.SimpleDVProducer;
import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
@@ -33,6 +34,12 @@ import org.apache.lucene.index.SortedDoc
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.FST.Arc;
+import org.apache.lucene.util.fst.FST.BytesReader;
+import org.apache.lucene.util.fst.PositiveIntOutputs;
+import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.packed.PackedInts;
class Lucene41SimpleDocValuesProducer extends SimpleDVProducer {
@@ -50,6 +57,9 @@ class Lucene41SimpleDocValuesProducer ex
// else in this map.
private final Map<Integer,BinaryDocValues> binaryInstances =
new HashMap<Integer,BinaryDocValues>();
+
+ private final Map<Integer,FST<Long>> fstInstances =
+ new HashMap<Integer,FST<Long>>();
Lucene41SimpleDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
@@ -96,6 +106,13 @@ class Lucene41SimpleDocValuesProducer ex
entry.minLength = meta.readVInt();
entry.maxLength = meta.readVInt();
binaries.put(fieldNumber, entry);
+ } else if (fieldType == Lucene41SimpleDocValuesConsumer.FST) {
+ FSTEntry entry = new FSTEntry();
+ entry.offset = meta.readLong();
+ entry.numOrds = meta.readVInt();
+ fsts.put(fieldNumber, entry);
+ } else {
+ throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta);
}
fieldNumber = meta.readVInt();
}
@@ -113,7 +130,6 @@ class Lucene41SimpleDocValuesProducer ex
private NumericDocValues loadNumeric(FieldInfo field) throws IOException {
NumericEntry entry = numerics.get(field.number);
- final IndexInput data = this.data.clone();
data.seek(entry.offset);
if (entry.tableized) {
int size = data.readVInt();
@@ -154,7 +170,6 @@ class Lucene41SimpleDocValuesProducer ex
private BinaryDocValues loadBinary(FieldInfo field) throws IOException {
BinaryEntry entry = binaries.get(field.number);
- final IndexInput data = this.data.clone();
data.seek(entry.offset);
assert entry.numBytes < Integer.MAX_VALUE; // nocommit
final byte[] bytes = new byte[(int)entry.numBytes];
@@ -184,13 +199,51 @@ class Lucene41SimpleDocValuesProducer ex
}
}
-
-
@Override
public SortedDocValues getSorted(FieldInfo field) throws IOException {
- throw new AssertionError();
- }
+ final FSTEntry entry = fsts.get(field.number);
+ FST<Long> instance;
+ synchronized(this) {
+ instance = fstInstances.get(field.number);
+ if (instance == null) {
+ data.seek(entry.offset);
+ instance = new FST<Long>(data, PositiveIntOutputs.getSingleton(true));
+ fstInstances.put(field.number, instance);
+ }
+ }
+ final NumericDocValues docToOrd = getNumeric(field);
+ final FST<Long> fst = instance;
+
+ // per-thread resources
+ final BytesReader in = fst.getBytesReader(0);
+ final Arc<Long> firstArc = new Arc<Long>();
+ final Arc<Long> scratchArc = new Arc<Long>();
+ final IntsRef scratchInts = new IntsRef();
+
+ return new SortedDocValues() {
+ @Override
+ public int getOrd(int docID) {
+ return (int) docToOrd.get(docID);
+ }
+
+ @Override
+ public void lookupOrd(int ord, BytesRef result) {
+ try {
+ in.setPosition(0);
+ fst.getFirstArc(firstArc);
+ Util.toBytesRef(Util.getByOutput(fst, ord, in, firstArc, scratchArc, scratchInts), result);
+ } catch (IOException bogus) {
+ throw new RuntimeException(bogus);
+ }
+ }
+ @Override
+ public int getValueCount() {
+ return entry.numOrds;
+ }
+ };
+ }
+
@Override
public void close() throws IOException {
data.close();
@@ -212,5 +265,4 @@ class Lucene41SimpleDocValuesProducer ex
long offset;
int numOrds;
}
-
}
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/Util.java?rev=1433250&r1=1433249&r2=1433250&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/Util.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/Util.java Tue Jan 15 00:51:19 2013
@@ -109,7 +109,13 @@ public final class Util {
FST.Arc<Long> scratchArc = new FST.Arc<Long>();
final IntsRef result = new IntsRef();
-
+
+ return getByOutput(fst, targetOutput, in, arc, scratchArc, result);
+ }
+
+ /** Expert: like {@link Util#getByOutput(FST, long)} except reusing */
+ // nocommit
+ public static IntsRef getByOutput(FST<Long> fst, long targetOutput, BytesReader in, Arc<Long> arc, Arc<Long> scratchArc, IntsRef result) throws IOException {
long output = arc.output;
int upto = 0;