You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2011/02/09 09:24:20 UTC
svn commit: r1068786 - in /lucene/dev/branches/docvalues/lucene/src:
java/org/apache/lucene/document/ java/org/apache/lucene/index/
java/org/apache/lucene/index/codecs/
java/org/apache/lucene/index/codecs/docvalues/
java/org/apache/lucene/index/values/...
Author: simonw
Date: Wed Feb 9 08:24:19 2011
New Revision: 1068786
URL: http://svn.apache.org/viewvc?rev=1068786&view=rev
Log:
LUCENE-2186: made class and var naming consistent, added javadoc and specialized codec provider
Added:
lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodecProvider.java (with props)
Modified:
lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/DocValuesField.java
lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java
lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfos.java
lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java
lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java
lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java
lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Bytes.java
lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/DocValues.java
lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/DocValuesEnum.java
lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java
lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Floats.java
lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java
lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Type.java
lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java
lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Writer.java
lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/FieldComparator.java
lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/SortField.java
lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/DocValuesField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/DocValuesField.java?rev=1068786&r1=1068785&r2=1068786&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/DocValuesField.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/DocValuesField.java Wed Feb 9 08:24:19 2011
@@ -99,36 +99,36 @@ public class DocValuesField extends Abst
/**
* Sets the given <code>long</code> value and sets the field's {@link Type} to
- * {@link Type#PACKED_INTS} unless already set. If you want to change the
+ * {@link Type#INTS} unless already set. If you want to change the
* default type use {@link #setType(Type)}.
*/
public void setInt(long value) {
if (type == null) {
- type = Type.PACKED_INTS;
+ type = Type.INTS;
}
longValue = value;
}
/**
* Sets the given <code>float</code> value and sets the field's {@link Type}
- * to {@link Type#SIMPLE_FLOAT_4BYTE} unless already set. If you want to
+ * to {@link Type#FLOAT_32} unless already set. If you want to
* change the type use {@link #setType(Type)}.
*/
public void setFloat(float value) {
if (type == null) {
- type = Type.SIMPLE_FLOAT_4BYTE;
+ type = Type.FLOAT_32;
}
doubleValue = value;
}
/**
* Sets the given <code>double</code> value and sets the field's {@link Type}
- * to {@link Type#SIMPLE_FLOAT_8BYTE} unless already set. If you want to
+ * to {@link Type#FLOAT_64} unless already set. If you want to
* change the default type use {@link #setType(Type)}.
*/
public void setFloat(double value) {
if (type == null) {
- type = Type.SIMPLE_FLOAT_8BYTE;
+ type = Type.FLOAT_64;
}
doubleValue = value;
}
@@ -269,13 +269,13 @@ public class DocValuesField extends Abst
field.stringValue());
valField.setBytes(ref, type);
break;
- case PACKED_INTS:
+ case INTS:
valField.setInt(Long.parseLong(field.stringValue()));
break;
- case SIMPLE_FLOAT_4BYTE:
+ case FLOAT_32:
valField.setFloat(Float.parseFloat(field.stringValue()));
break;
- case SIMPLE_FLOAT_8BYTE:
+ case FLOAT_64:
valField.setFloat(Double.parseDouble(field.stringValue()));
break;
default:
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java?rev=1068786&r1=1068785&r2=1068786&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java Wed Feb 9 08:24:19 2011
@@ -54,7 +54,7 @@ final class DocFieldProcessor extends Do
fieldInfo.setDocValues(values.type());
if(fieldsConsumer == null) {
- /* nocommit -- this is a hack and only works since DocValuesCodec supports initializing the FieldsConsumer twice.
+ /* TODO (close to no commit) -- this is a hack and only works since DocValuesCodec supports initializing the FieldsConsumer twice.
* we need to find a way that allows us to obtain a FieldsConsumer per DocumentsWriter. Currently some codecs rely on
* the SegmentsWriteState passed in right at the moment when the segment is flushed (doccount etc) but we need the consumer earlier
* to support docvalues and later on stored fields too.
@@ -64,7 +64,6 @@ final class DocFieldProcessor extends Do
}
valuesConsumer = fieldsConsumer.addValuesField(fieldInfo);
docValues.put(name, valuesConsumer);
-
}
return valuesConsumer;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java?rev=1068786&r1=1068785&r2=1068786&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java Wed Feb 9 08:24:19 2011
@@ -23,7 +23,6 @@ import java.util.HashSet;
import java.util.List;
import java.io.IOException;
-import org.apache.lucene.document.AbstractField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer;
@@ -253,11 +252,7 @@ final class DocFieldProcessorPerThread e
final DocFieldProcessorPerField perField = fields[i];
final Fieldable fieldable = perField.fields[0];
perField.consumer.processFields(perField.fields, perField.fieldCount);
-
- if (!(fieldable instanceof AbstractField)) {
- continue;
- }
- final PerDocFieldValues docValues = ((AbstractField)fieldable).getDocValues();
+ final PerDocFieldValues docValues = fieldable.getDocValues();
if (docValues == null) {
continue;
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfos.java?rev=1068786&r1=1068785&r2=1068786&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfos.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfos.java Wed Feb 9 08:24:19 2011
@@ -326,13 +326,13 @@ public final class FieldInfos {
b = 0;
} else {
switch(fi.docValues) {
- case PACKED_INTS:
+ case INTS:
b = 1;
break;
- case SIMPLE_FLOAT_4BYTE:
+ case FLOAT_32:
b = 2;
break;
- case SIMPLE_FLOAT_8BYTE:
+ case FLOAT_64:
b = 3;
break;
case BYTES_FIXED_STRAIGHT:
@@ -393,13 +393,13 @@ public final class FieldInfos {
docValuesType = null;
break;
case 1:
- docValuesType = Type.PACKED_INTS;
+ docValuesType = Type.INTS;
break;
case 2:
- docValuesType = Type.SIMPLE_FLOAT_4BYTE;
+ docValuesType = Type.FLOAT_32;
break;
case 3:
- docValuesType = Type.SIMPLE_FLOAT_8BYTE;
+ docValuesType = Type.FLOAT_64;
break;
case 4:
docValuesType = Type.BYTES_FIXED_STRAIGHT;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java?rev=1068786&r1=1068785&r2=1068786&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java Wed Feb 9 08:24:19 2011
@@ -22,6 +22,7 @@ import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
+import java.util.Map.Entry;
import org.apache.lucene.index.codecs.preflex.PreFlexCodec;
import org.apache.lucene.index.codecs.pulsing.PulsingCodec;
@@ -163,6 +164,26 @@ public class CodecProvider {
public synchronized void setDefaultFieldCodec(String codec) {
defaultFieldCodec = codec;
}
+
+ /**
+ * Registers all codecs from the given provider including the field to codec
+ * mapping and the default field codec.
+ * <p>
+ * NOTE: This method will pass any codec from the given codec to
+ * {@link #register(Codec)} and sets fiels codecs via
+ * {@link #setFieldCodec(String, String)}.
+ */
+ public void copyFrom(CodecProvider other) {
+ final Collection<Codec> values = other.codecs.values();
+ for (Codec codec : values) {
+ register(codec);
+ }
+ final Set<Entry<String, String>> entrySet = other.perFieldMap.entrySet();
+ for (Entry<String, String> entry : entrySet) {
+ setFieldCodec(entry.getKey(), entry.getValue());
+ }
+ setDefaultFieldCodec(other.getDefaultFieldCodec());
+ }
}
class DefaultCodecProvider extends CodecProvider {
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java?rev=1068786&r1=1068785&r2=1068786&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java Wed Feb 9 08:24:19 2011
@@ -49,7 +49,7 @@ public class DocValuesCodec extends Code
private final Comparator<BytesRef> comparator;
public DocValuesCodec(Codec other, Comparator<BytesRef> comparator) {
- this.name = "docvalues_" + other.name;
+ this.name = other.name;
this.other = other;
this.comparator = comparator;
}
Added: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodecProvider.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodecProvider.java?rev=1068786&view=auto
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodecProvider.java (added)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodecProvider.java Wed Feb 9 08:24:19 2011
@@ -0,0 +1,34 @@
+package org.apache.lucene.index.codecs.docvalues;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.CodecProvider;
+
+/**
+ * Simple Codec provider that wrapps all registered codecs into a {@link DocValuesCodec}
+ */
+public class DocValuesCodecProvider extends CodecProvider {
+
+ @Override
+ public synchronized void register(Codec codec) {
+ if (codec instanceof DocValuesCodec) {
+ super.register(codec);
+ } else {
+ super.register(new DocValuesCodec(codec));
+ }
+ }
+}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java?rev=1068786&r1=1068785&r2=1068786&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java Wed Feb 9 08:24:19 2011
@@ -111,11 +111,11 @@ public abstract class DocValuesProducerB
protected DocValues loadDocValues(int docCount, Directory dir, String id,
Type type) throws IOException {
switch (type) {
- case PACKED_INTS:
+ case INTS:
return Ints.getValues(dir, id, false);
- case SIMPLE_FLOAT_4BYTE:
+ case FLOAT_32:
return Floats.getValues(dir, id, docCount);
- case SIMPLE_FLOAT_8BYTE:
+ case FLOAT_64:
return Floats.getValues(dir, id, docCount);
case BYTES_FIXED_STRAIGHT:
return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, true, docCount);
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Bytes.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Bytes.java?rev=1068786&r1=1068785&r2=1068786&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Bytes.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Bytes.java Wed Feb 9 08:24:19 2011
@@ -54,14 +54,16 @@ import org.apache.lucene.util.PagedBytes
* @lucene.experimental
*/
public final class Bytes {
- //TODO - add bulk copy where possible
- private Bytes() { /* don't instantiate! */ }
+ // TODO - add bulk copy where possible
+ private Bytes() { /* don't instantiate! */
+ }
/**
* Defines the {@link Writer}s store mode. The writer will either store the
* bytes sequentially ({@link #STRAIGHT}, dereferenced ({@link #DEREF}) or
* sorted ({@link #SORTED})
*
+ * @lucene.experimental
*/
public static enum Mode {
/**
@@ -180,10 +182,10 @@ public final class Bytes {
// TODO open up this API?
static abstract class BytesBaseSource extends Source {
+ private final PagedBytes pagedBytes;
protected final IndexInput datIn;
protected final IndexInput idxIn;
protected final static int PAGED_BYTES_BITS = 15;
- private final PagedBytes pagedBytes;
protected final PagedBytes.Reader data;
protected final long totalLengthInBytes;
@@ -204,11 +206,13 @@ public final class Bytes {
data.close(); // close data
} finally {
try {
- if (datIn != null)
+ if (datIn != null) {
datIn.close();
+ }
} finally {
- if (idxIn != null) // if straight - no index needed
+ if (idxIn != null) {// if straight - no index needed
idxIn.close();
+ }
}
}
}
@@ -269,18 +273,27 @@ public final class Bytes {
return ord == 0 ? null : deref(--ord, bytesRef);
}
- public void close() throws IOException {
+ protected void closeIndexInput() throws IOException {
try {
- if (datIn != null)
+ if (datIn != null) {
datIn.close();
+ }
} finally {
- if (idxIn != null) // if straight
+ if (idxIn != null) {// if straight
idxIn.close();
+ }
}
}
+ /**
+ * Returns the largest doc id + 1 in this doc values source
+ */
protected abstract int maxDoc();
+ /**
+ * Copies the value for the given ord to the given {@link BytesRef} and
+ * returns it.
+ */
protected abstract BytesRef deref(int ord, BytesRef bytesRef);
protected LookupResult binarySearch(BytesRef b, BytesRef bytesRef, int low,
@@ -328,7 +341,6 @@ public final class Bytes {
// TODO: open up this API?!
static abstract class BytesWriterBase extends Writer {
-
private final Directory dir;
private final String id;
protected IndexOutput idxOut;
@@ -347,10 +359,13 @@ public final class Bytes {
this.codecName = codecName;
this.version = version;
this.pool = pool;
- if (initData)
+ if (initData) {
initDataOut();
- if (initIndex)
+ }
+
+ if (initIndex) {
initIndexOut();
+ }
}
private void initDataOut() throws IOException {
@@ -365,10 +380,6 @@ public final class Bytes {
CodecUtil.writeHeader(idxOut, codecName, version);
}
- public long ramBytesUsed() {
- return bytesUsed.get();
- }
-
/**
* Must be called only with increasing docIDs. It's OK for some docIDs to be
* skipped; they will be filled with 0 bytes.
@@ -448,14 +459,19 @@ public final class Bytes {
} else {
idxIn = null;
}
-
}
+ /**
+ * clones and returns the data {@link IndexInput}
+ */
protected final IndexInput cloneData() {
assert datIn != null;
return (IndexInput) datIn.clone();
}
+ /**
+ * clones and returns the indexing {@link IndexInput}
+ */
protected final IndexInput cloneIndex() {
assert idxIn != null;
return (IndexInput) idxIn.clone();
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/DocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/DocValues.java?rev=1068786&r1=1068785&r2=1068786&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/DocValues.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/DocValues.java Wed Feb 9 08:24:19 2011
@@ -28,12 +28,19 @@ import org.apache.lucene.util.BytesRef;
/**
* TODO
+ *
* @see FieldsEnum#docValues()
* @see Fields#docValues(String)
* @lucene.experimental
*/
public abstract class DocValues implements Closeable {
-
+ /*
+ * TODO: it might be useful to add another Random Access enum for some
+ * implementations like packed ints and only return such a random access enum
+ * if the impl supports random access. For super large segments it might be
+ * useful or even required in certain environements to have disc based random
+ * access
+ */
public static final DocValues[] EMPTY_ARRAY = new DocValues[0];
private SourceCache cache = new SourceCache.DirectSourceCache();
@@ -141,7 +148,9 @@ public abstract class DocValues implemen
* <p>
* Note: All instances previously obtained from {@link #load()} or
* {@link #loadSorted(Comparator)} will be closed.
- * @throws IllegalArgumentException if the given cache is <code>null</code>
+ *
+ * @throws IllegalArgumentException
+ * if the given cache is <code>null</code>
*
*/
public void setCache(SourceCache cache) {
@@ -162,6 +171,7 @@ public abstract class DocValues implemen
* {@link Source} defines 3 {@link Type} //TODO finish this
*/
public static abstract class Source {
+ // TODO we might need a close method here to null out the internal used arrays?!
protected final MissingValue missingValue = new MissingValue();
/**
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/DocValuesEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/DocValuesEnum.java?rev=1068786&r1=1068785&r2=1068786&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/DocValuesEnum.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/DocValuesEnum.java Wed Feb 9 08:24:19 2011
@@ -37,7 +37,7 @@ import org.apache.lucene.util.LongsRef;
* <p>
* Note: Only the reference for the enum's type are initialized to non
* <code>null</code> ie. {@link #getInt()} will always return <code>null</code>
- * if the enum's Type is {@link Type#SIMPLE_FLOAT_4BYTE}.
+ * if the enum's Type is {@link Type#FLOAT_32}.
*
* @lucene.experimental
*/
@@ -71,11 +71,11 @@ public abstract class DocValuesEnum exte
case BYTES_VAR_STRAIGHT:
bytesRef = new BytesRef();
break;
- case PACKED_INTS:
+ case INTS:
intsRef = new LongsRef(1);
break;
- case SIMPLE_FLOAT_4BYTE:
- case SIMPLE_FLOAT_8BYTE:
+ case FLOAT_32:
+ case FLOAT_64:
floatsRef = new FloatsRef(1);
break;
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java?rev=1068786&r1=1068785&r2=1068786&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java Wed Feb 9 08:24:19 2011
@@ -194,6 +194,7 @@ class FixedSortedBytesImpl {
this.size = size;
this.numValue = numValues;
index = PackedInts.getReader(idxIn);
+ closeIndexInput();
}
@Override
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Floats.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Floats.java?rev=1068786&r1=1068785&r2=1068786&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Floats.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Floats.java Wed Feb 9 08:24:19 2011
@@ -297,7 +297,7 @@ public class Floats {
public DocValuesEnum getEnum(AttributeSource attrSource)
throws IOException {
final MissingValue missing = getMissing();
- return new SourceEnum(attrSource, Type.SIMPLE_FLOAT_4BYTE, this, maxDoc) {
+ return new SourceEnum(attrSource, Type.FLOAT_32, this, maxDoc) {
@Override
public int advance(int target) throws IOException {
if (target >= numDocs)
@@ -315,7 +315,7 @@ public class Floats {
@Override
public Type type() {
- return Type.SIMPLE_FLOAT_4BYTE;
+ return Type.FLOAT_32;
}
}
@@ -355,7 +355,7 @@ public class Floats {
@Override
public Type type() {
- return Type.SIMPLE_FLOAT_8BYTE;
+ return Type.FLOAT_64;
}
}
@@ -377,8 +377,8 @@ public class Floats {
@Override
public Type type() {
- return precisionBytes == 4 ? Type.SIMPLE_FLOAT_4BYTE
- : Type.SIMPLE_FLOAT_8BYTE;
+ return precisionBytes == 4 ? Type.FLOAT_32
+ : Type.FLOAT_64;
}
}
@@ -386,7 +386,7 @@ public class Floats {
Floats4Enum(AttributeSource source, IndexInput dataIn, int maxDoc)
throws IOException {
- super(source, dataIn, 4, maxDoc, Type.SIMPLE_FLOAT_4BYTE);
+ super(source, dataIn, 4, maxDoc, Type.FLOAT_32);
}
@Override
@@ -422,7 +422,7 @@ public class Floats {
Floats8EnumImpl(AttributeSource source, IndexInput dataIn, int maxDoc)
throws IOException {
- super(source, dataIn, 8, maxDoc, Type.SIMPLE_FLOAT_8BYTE);
+ super(source, dataIn, 8, maxDoc, Type.FLOAT_64);
}
@Override
@@ -464,8 +464,8 @@ public class Floats {
FloatsEnumImpl(AttributeSource source, IndexInput dataIn, int precision,
int maxDoc, Type type) throws IOException {
- super(source, precision == 4 ? Type.SIMPLE_FLOAT_4BYTE
- : Type.SIMPLE_FLOAT_8BYTE);
+ super(source, precision == 4 ? Type.FLOAT_32
+ : Type.FLOAT_64);
this.dataIn = dataIn;
this.precision = precision;
this.maxDoc = maxDoc;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java?rev=1068786&r1=1068785&r2=1068786&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java Wed Feb 9 08:24:19 2011
@@ -111,7 +111,7 @@ class PackedIntsImpl {
PackedInts.bitsRequired(maxValue - minValue));
final int firstDoc = defaultValues.nextSetBit(0);
lastDocId++;
- if(firstDoc != -1) {
+ if (firstDoc != -1) {
for (int i = 0; i < firstDoc; i++) {
w.add(defaultValue); // fill with defaults until first bit set
}
@@ -223,7 +223,7 @@ class PackedIntsImpl {
@Override
public Type type() {
- return Type.PACKED_INTS;
+ return Type.INTS;
}
}
@@ -240,7 +240,7 @@ class PackedIntsImpl {
@Override
public Type type() {
- return Type.PACKED_INTS;
+ return Type.INTS;
}
}
@@ -255,7 +255,7 @@ class PackedIntsImpl {
private IntsEnumImpl(AttributeSource source, IndexInput dataIn)
throws IOException {
- super(source, Type.PACKED_INTS);
+ super(source, Type.INTS);
intsRef.offset = 0;
this.dataIn = dataIn;
dataIn.seek(CodecUtil.headerLength(CODEC_NAME));
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Type.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Type.java?rev=1068786&r1=1068785&r2=1068786&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Type.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Type.java Wed Feb 9 08:24:19 2011
@@ -1,4 +1,5 @@
package org.apache.lucene.index.values;
+
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,30 +17,38 @@ package org.apache.lucene.index.values;
* limitations under the License.
*/
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.values.DocValues.SortedSource;
/**
* {@link Type} specifies the type of the {@link DocValues} for a certain field.
- * A {@link Type} can specify the actual data type for a field, used compression
- * schemes and high-level data-structures.
+ * A {@link Type} only defines the data type for a field while the actual
+ * implemenation used to encode and decode the values depends on the field's
+ * {@link Codec}. It is up to the {@link Codec} implementing
+ * {@link FieldsConsumer#addValuesField(org.apache.lucene.index.FieldInfo)} and
+ * using a different low-level implemenations to write the stored values for a
+ * field.
*
* @lucene.experimental
*/
public enum Type {
-
+ /*
+ * TODO: Add INT_32 INT_64 INT_16 & INT_8?!
+ */
/**
- * Integral value is stored as packed ints. The bit precision is fixed across
- * the segment, and determined by the min/max values in the field.
+ * Integer values.
*/
- PACKED_INTS,
+ INTS,
+
/**
- * 32 bit floating point value stored without modification or compression.
+ * 32 bit floating point values.
*/
- SIMPLE_FLOAT_4BYTE,
+ FLOAT_32,
/**
- * 64 bit floating point value stored without modification or compression.
+ * 64 bit floating point values.
*/
- SIMPLE_FLOAT_8BYTE,
+ FLOAT_64,
// TODO(simonw): -- shouldn't lucene decide/detect straight vs
// deref, as well fixed vs var?
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java?rev=1068786&r1=1068785&r2=1068786&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java Wed Feb 9 08:24:19 2011
@@ -98,6 +98,15 @@ class VarDerefBytesImpl {
}
+ /*
+ * TODO: if impls like this are merged we are bound to the amount of memory we
+ * can store into a BytesRefHash and therefore how much memory a ByteBlockPool
+ * can address. This is currently limited to 2GB. While we could extend that
+ * and use 64bit for addressing this still limits us to the existing main
+ * memory as all distinct bytes will be loaded up into main memory. We could
+ * move the byte[] writing to #finish(int) and store the bytes in sorted
+ * order and merge them in a streamed fashion.
+ */
static class Writer extends BytesWriterBase {
private int[] docToAddress;
private int address = 1;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java?rev=1068786&r1=1068785&r2=1068786&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java Wed Feb 9 08:24:19 2011
@@ -186,6 +186,7 @@ class VarSortedBytesImpl {
docToOrdIndex = PackedInts.getReader(idxIn);
ordToOffsetIndex = PackedInts.getReader(idxIn);
valueCount = ordToOffsetIndex.size();
+ closeIndexInput();
}
@Override
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Writer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Writer.java?rev=1068786&r1=1068785&r2=1068786&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Writer.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Writer.java Wed Feb 9 08:24:19 2011
@@ -197,11 +197,11 @@ public abstract class Writer extends Doc
comp = BytesRef.getUTF8SortedAsUnicodeComparator();
}
switch (type) {
- case PACKED_INTS:
+ case INTS:
return Ints.getWriter(directory, id, true, bytesUsed);
- case SIMPLE_FLOAT_4BYTE:
+ case FLOAT_32:
return Floats.getWriter(directory, id, 4, bytesUsed);
- case SIMPLE_FLOAT_8BYTE:
+ case FLOAT_64:
return Floats.getWriter(directory, id, 8, bytesUsed);
case BYTES_FIXED_STRAIGHT:
return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, comp, true,
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/FieldComparator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/FieldComparator.java?rev=1068786&r1=1068785&r2=1068786&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/FieldComparator.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/FieldComparator.java Wed Feb 9 08:24:19 2011
@@ -332,13 +332,13 @@ public abstract class FieldComparator {
}
/** Uses float index values to sort by ascending value */
- public static final class FloatIndexValuesComparator extends FieldComparator {
+ public static final class FloatDocValuesComparator extends FieldComparator {
private final double[] values;
private Source currentReaderValues;
private final String field;
private double bottom;
- FloatIndexValuesComparator(int numHits, String field) {
+ FloatDocValuesComparator(int numHits, String field) {
values = new double[numHits];
this.field = field;
}
@@ -599,13 +599,13 @@ public abstract class FieldComparator {
}
/** Loads int index values and sorts by ascending value. */
- public static final class IntIndexValuesComparator extends FieldComparator {
+ public static final class IntDocValuesComparator extends FieldComparator {
private final long[] values;
private Source currentReaderValues;
private final String field;
private long bottom;
- IntIndexValuesComparator(int numHits, String field) {
+ IntDocValuesComparator(int numHits, String field) {
values = new long[numHits];
this.field = field;
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/SortField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/SortField.java?rev=1068786&r1=1068785&r2=1068786&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/SortField.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/search/SortField.java Wed Feb 9 08:24:19 2011
@@ -493,14 +493,14 @@ implements Serializable {
case SortField.INT:
if (useIndexValues) {
- return new FieldComparator.IntIndexValuesComparator(numHits, field);
+ return new FieldComparator.IntDocValuesComparator(numHits, field);
} else {
return new FieldComparator.IntComparator(numHits, (IntValuesCreator)creator, (Integer) missingValue);
}
case SortField.FLOAT:
if (useIndexValues) {
- return new FieldComparator.FloatIndexValuesComparator(numHits, field);
+ return new FieldComparator.FloatDocValuesComparator(numHits, field);
} else {
return new FieldComparator.FloatComparator(numHits, (FloatValuesCreator) creator, (Float) missingValue);
}
Modified: lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java?rev=1068786&r1=1068785&r2=1068786&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java Wed Feb 9 08:24:19 2011
@@ -43,7 +43,7 @@ import org.apache.lucene.index.MergePoli
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.codecs.CodecProvider;
-import org.apache.lucene.index.codecs.docvalues.DocValuesCodec;
+import org.apache.lucene.index.codecs.docvalues.DocValuesCodecProvider;
import org.apache.lucene.index.values.DocValues.MissingValue;
import org.apache.lucene.index.values.DocValues.Source;
import org.apache.lucene.queryParser.ParseException;
@@ -79,19 +79,13 @@ public class TestDocValuesIndexing exten
*
*/
- private DocValuesCodec docValuesCodec;
private CodecProvider provider;
@Before
public void setUp() throws Exception {
super.setUp();
- String defaultFieldCodec = CodecProvider.getDefault()
- .getDefaultFieldCodec();
- provider = new CodecProvider();
- docValuesCodec = new DocValuesCodec(CodecProvider.getDefault().lookup(
- defaultFieldCodec));
- provider.register(docValuesCodec);
- provider.setDefaultFieldCodec(docValuesCodec.name);
+ provider = new DocValuesCodecProvider();
+ provider.copyFrom(CodecProvider.getDefault());
}
/*
@@ -278,7 +272,7 @@ public class TestDocValuesIndexing exten
final int numRemainingValues = (int) (numValues - deleted.cardinality());
final int base = r.numDocs() - numRemainingValues;
switch (val) {
- case PACKED_INTS: {
+ case INTS: {
DocValues intsReader = getDocValues(r, val.name());
assertNotNull(intsReader);
@@ -309,8 +303,8 @@ public class TestDocValuesIndexing exten
}
}
break;
- case SIMPLE_FLOAT_4BYTE:
- case SIMPLE_FLOAT_8BYTE: {
+ case FLOAT_32:
+ case FLOAT_64: {
DocValues floatReader = getDocValues(r, val.name());
assertNotNull(floatReader);
Source floats = getSource(floatReader);
@@ -515,8 +509,8 @@ public class TestDocValuesIndexing exten
Type.BYTES_FIXED_SORTED, Type.BYTES_FIXED_STRAIGHT, Type.BYTES_VAR_DEREF,
Type.BYTES_VAR_SORTED, Type.BYTES_VAR_STRAIGHT);
- private static EnumSet<Type> NUMERICS = EnumSet.of(Type.PACKED_INTS,
- Type.SIMPLE_FLOAT_4BYTE, Type.SIMPLE_FLOAT_8BYTE);
+ private static EnumSet<Type> NUMERICS = EnumSet.of(Type.INTS,
+ Type.FLOAT_32, Type.FLOAT_64);
private static Index[] IDX_VALUES = new Index[] { Index.ANALYZED,
Index.ANALYZED_NO_NORMS, Index.NOT_ANALYZED, Index.NOT_ANALYZED_NO_NORMS,
@@ -547,11 +541,11 @@ public class TestDocValuesIndexing exten
for (int i = 0; i < numValues; i++) {
if (isNumeric) {
switch (value) {
- case PACKED_INTS:
+ case INTS:
valField.setInt(i);
break;
- case SIMPLE_FLOAT_4BYTE:
- case SIMPLE_FLOAT_8BYTE:
+ case FLOAT_32:
+ case FLOAT_64:
valField.setFloat(2.0f * i);
break;
default: