You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2013/01/08 00:41:09 UTC
svn commit: r1430090 - in /lucene/dev/branches/lucene4547/lucene:
codecs/src/java/org/apache/lucene/codecs/memory/
codecs/src/java/org/apache/lucene/codecs/simpletext/
core/src/java/org/apache/lucene/codecs/
core/src/java/org/apache/lucene/codecs/lucen...
Author: mikemccand
Date: Mon Jan 7 23:41:09 2013
New Revision: 1430090
URL: http://svn.apache.org/viewvc?rev=1430090&view=rev
Log:
cutover to Iterable for SortedDV
Removed:
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SortedDocValuesConsumer.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/values/
Modified:
lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesFormat.java
lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesFormat.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/SortedBytesDVWriter.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java
Modified: lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesFormat.java?rev=1430090&r1=1430089&r2=1430090&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesFormat.java (original)
+++ lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesFormat.java Mon Jan 7 23:41:09 2013
@@ -123,8 +123,6 @@ public class MemoryDocValuesFormat exten
public SortedDocValues getSorted(FieldInfo field) throws IOException {
SortedDocValues valuesIn = producer.getSorted(field);
final int maxDoc = valuesIn.size();
- final int maxLength = valuesIn.maxLength();
- final boolean fixedLength = valuesIn.isFixedLength();
final int valueCount = valuesIn.getValueCount();
// nocommit used packed ints and so on
@@ -163,16 +161,6 @@ public class MemoryDocValuesFormat exten
public int size() {
return maxDoc;
}
-
- @Override
- public boolean isFixedLength() {
- return fixedLength;
- }
-
- @Override
- public int maxLength() {
- return maxLength;
- }
};
}
Modified: lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java?rev=1430090&r1=1430089&r2=1430090&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java (original)
+++ lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java Mon Jan 7 23:41:09 2013
@@ -32,7 +32,6 @@ import java.util.Set;
import org.apache.lucene.codecs.SimpleDVConsumer;
import org.apache.lucene.codecs.SimpleDVProducer;
import org.apache.lucene.codecs.SimpleDocValuesFormat;
-import org.apache.lucene.codecs.SortedDocValuesConsumer;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValues;
@@ -268,21 +267,24 @@ public class SimpleTextSimpleDocValuesFo
}
@Override
- public SortedDocValuesConsumer addSortedField(FieldInfo field, final int valueCount, boolean fixedLength, final int maxLength) throws IOException {
+ public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
assert fieldSeen(field.name);
assert DocValues.isSortedBytes(field.getDocValuesType());
assert !isNorms;
writeFieldEntry(field);
+
+ int valueCount = 0;
+ int maxLength = -1;
+ for(BytesRef value : values) {
+ maxLength = Math.max(maxLength, value.length);
+ valueCount++;
+ }
+
// write numValues
SimpleTextUtil.write(data, NUMVALUES);
SimpleTextUtil.write(data, Integer.toString(valueCount), scratch);
SimpleTextUtil.writeNewline(data);
- // write fixedlength
- SimpleTextUtil.write(data, FIXEDLENGTH);
- SimpleTextUtil.write(data, Boolean.toString(fixedLength), scratch);
- SimpleTextUtil.writeNewline(data);
-
// write maxLength
SimpleTextUtil.write(data, MAXLENGTH);
SimpleTextUtil.write(data, Integer.toString(maxLength), scratch);
@@ -312,40 +314,34 @@ public class SimpleTextSimpleDocValuesFo
SimpleTextUtil.writeNewline(data);
final DecimalFormat ordEncoder = new DecimalFormat(sb.toString(), new DecimalFormatSymbols(Locale.ROOT));
- return new SortedDocValuesConsumer() {
+ // for asserts:
+ int valuesSeen = 0;
- // for asserts:
- private int valuesSeen;
-
- @Override
- public void addValue(BytesRef value) throws IOException {
- // write length
- SimpleTextUtil.write(data, LENGTH);
- SimpleTextUtil.write(data, encoder.format(value.length), scratch);
- SimpleTextUtil.writeNewline(data);
+ for(BytesRef value : values) {
+ // write length
+ SimpleTextUtil.write(data, LENGTH);
+ SimpleTextUtil.write(data, encoder.format(value.length), scratch);
+ SimpleTextUtil.writeNewline(data);
- // write bytes -- don't use SimpleText.write
- // because it escapes:
- data.writeBytes(value.bytes, value.offset, value.length);
-
- // pad to fit
- for (int i = value.length; i < maxLength; i++) {
- data.writeByte((byte)' ');
- }
- SimpleTextUtil.writeNewline(data);
- valuesSeen++;
- assert valuesSeen <= valueCount;
- }
+ // write bytes -- don't use SimpleText.write
+ // because it escapes:
+ data.writeBytes(value.bytes, value.offset, value.length);
- @Override
- public void addDoc(int ord) throws IOException {
- SimpleTextUtil.write(data, ordEncoder.format(ord), scratch);
- SimpleTextUtil.writeNewline(data);
+ // pad to fit
+ for (int i = value.length; i < maxLength; i++) {
+ data.writeByte((byte)' ');
}
+ SimpleTextUtil.writeNewline(data);
+ valuesSeen++;
+ assert valuesSeen <= valueCount;
+ }
- @Override
- public void finish() throws IOException {}
- };
+ assert valuesSeen == valueCount;
+
+ for(Number ord : docToOrd) {
+ SimpleTextUtil.write(data, ordEncoder.format(ord.intValue()), scratch);
+ SimpleTextUtil.writeNewline(data);
+ }
}
/** write the header for this field */
@@ -450,9 +446,6 @@ public class SimpleTextSimpleDocValuesFo
assert startsWith(NUMVALUES);
field.numValues = Integer.parseInt(stripPrefix(NUMVALUES));
readLine();
- assert startsWith(FIXEDLENGTH);
- field.fixedLength = Boolean.parseBoolean(stripPrefix(FIXEDLENGTH));
- readLine();
assert startsWith(MAXLENGTH);
field.maxLength = Integer.parseInt(stripPrefix(MAXLENGTH));
readLine();
@@ -635,16 +628,6 @@ public class SimpleTextSimpleDocValuesFo
public int size() {
return maxDoc;
}
-
- @Override
- public boolean isFixedLength() {
- return field.fixedLength;
- }
-
- @Override
- public int maxLength() {
- return field.maxLength;
- }
};
}
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java?rev=1430090&r1=1430089&r2=1430090&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java Mon Jan 7 23:41:09 2013
@@ -19,6 +19,7 @@ package org.apache.lucene.codecs;
import java.io.Closeable;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
@@ -31,6 +32,8 @@ import org.apache.lucene.index.NumericDo
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.PriorityQueue;
// prototype streaming DV api
public abstract class SimpleDVConsumer implements Closeable {
@@ -42,8 +45,7 @@ public abstract class SimpleDVConsumer i
public abstract void addBinaryField(FieldInfo field, Iterable<BytesRef> values) throws IOException;
- // nocommit: figure out whats fair here.
- public abstract SortedDocValuesConsumer addSortedField(FieldInfo field, int valueCount, boolean fixedLength, int maxLength) throws IOException;
+ public abstract void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException;
// dead simple impl: codec can optimize
public void mergeNumericField(FieldInfo fieldInfo, final MergeState mergeState, final List<NumericDocValues> toMerge) throws IOException {
@@ -183,10 +185,238 @@ public abstract class SimpleDVConsumer i
});
}
- public void mergeSortedField(FieldInfo fieldInfo, MergeState mergeState, List<SortedDocValues> toMerge) throws IOException {
- SortedDocValuesConsumer.Merger merger = new SortedDocValuesConsumer.Merger();
+ public static class SortedBytesMerger {
+
+ public int numMergedTerms;
+
+ final List<BytesRef> mergedTerms = new ArrayList<BytesRef>();
+ final List<SegmentState> segStates = new ArrayList<SegmentState>();
+
+ private static class SegmentState {
+ AtomicReader reader;
+ FixedBitSet liveTerms;
+ int ord = -1;
+ SortedDocValues values;
+ BytesRef scratch = new BytesRef();
+
+ // nocommit can we factor out the compressed fields
+ // compression? ie we have a good idea "roughly" what
+ // the ord should be (linear projection) so we only
+ // need to encode the delta from that ...:
+ int[] segOrdToMergedOrd;
+
+ public BytesRef nextTerm() {
+ while (ord < values.getValueCount()-1) {
+ ord++;
+ if (liveTerms == null || liveTerms.get(ord)) {
+ values.lookupOrd(ord, scratch);
+ return scratch;
+ } else {
+ // Skip "deleted" terms (ie, terms that were not
+ // referenced by any live docs):
+ values.lookupOrd(ord, scratch);
+ }
+ }
+
+ return null;
+ }
+ }
+
+ private static class TermMergeQueue extends PriorityQueue<SegmentState> {
+ public TermMergeQueue(int maxSize) {
+ super(maxSize);
+ }
+
+ @Override
+ protected boolean lessThan(SegmentState a, SegmentState b) {
+ return a.scratch.compareTo(b.scratch) <= 0;
+ }
+ }
+
+ public void merge(MergeState mergeState, List<SortedDocValues> toMerge) throws IOException {
+
+ // First pass: mark "live" terms
+ for (int readerIDX=0;readerIDX<toMerge.size();readerIDX++) {
+ AtomicReader reader = mergeState.readers.get(readerIDX);
+ // nocommit what if this is null...? need default source?
+ int maxDoc = reader.maxDoc();
+
+ SegmentState state = new SegmentState();
+ state.reader = reader;
+ state.values = toMerge.get(readerIDX);
+
+ segStates.add(state);
+ assert state.values.getValueCount() < Integer.MAX_VALUE;
+ if (reader.hasDeletions()) {
+ state.liveTerms = new FixedBitSet(state.values.getValueCount());
+ Bits liveDocs = reader.getLiveDocs();
+ for(int docID=0;docID<maxDoc;docID++) {
+ if (liveDocs.get(docID)) {
+ state.liveTerms.set(state.values.getOrd(docID));
+ }
+ }
+ }
+
+ // nocommit we can unload the bits to disk to reduce
+ // transient ram spike...
+ }
+
+ // Second pass: merge only the live terms
+
+ TermMergeQueue q = new TermMergeQueue(segStates.size());
+ for(SegmentState segState : segStates) {
+ if (segState.nextTerm() != null) {
+
+ // nocommit we could defer this to 3rd pass (and
+ // reduce transient RAM spike) but then
+ // we'd spend more effort computing the mapping...:
+ segState.segOrdToMergedOrd = new int[segState.values.getValueCount()];
+ q.add(segState);
+ }
+ }
+
+ BytesRef lastTerm = null;
+ int ord = 0;
+ while (q.size() != 0) {
+ SegmentState top = q.top();
+ if (lastTerm == null || !lastTerm.equals(top.scratch)) {
+ lastTerm = BytesRef.deepCopyOf(top.scratch);
+ // nocommit we could spill this to disk instead of
+ // RAM, and replay on finish...
+ mergedTerms.add(lastTerm);
+ ord++;
+ }
+
+ top.segOrdToMergedOrd[top.ord] = ord-1;
+ if (top.nextTerm() == null) {
+ q.pop();
+ } else {
+ q.updateTop();
+ }
+ }
+
+ numMergedTerms = ord;
+ }
+
+ /*
+ public void finish(SortedDocValuesConsumer consumer) throws IOException {
+
+ // Third pass: write merged result
+ for(BytesRef term : mergedTerms) {
+ consumer.addValue(term);
+ }
+
+ for(SegmentState segState : segStates) {
+ Bits liveDocs = segState.reader.getLiveDocs();
+ int maxDoc = segState.reader.maxDoc();
+ for(int docID=0;docID<maxDoc;docID++) {
+ if (liveDocs == null || liveDocs.get(docID)) {
+ int segOrd = segState.values.getOrd(docID);
+ int mergedOrd = segState.segOrdToMergedOrd[segOrd];
+ consumer.addDoc(mergedOrd);
+ }
+ }
+ }
+ }
+ */
+ }
+
+ public void mergeSortedField(FieldInfo fieldInfo, final MergeState mergeState, List<SortedDocValues> toMerge) throws IOException {
+ final SortedBytesMerger merger = new SortedBytesMerger();
+
+ // Does the heavy lifting to merge sort all "live" ords:
merger.merge(mergeState, toMerge);
- SortedDocValuesConsumer consumer = addSortedField(fieldInfo, merger.numMergedTerms, merger.fixedLength >= 0, merger.maxLength);
- consumer.merge(mergeState, merger);
+
+ addSortedField(fieldInfo,
+
+ // ord -> value
+ new Iterable<BytesRef>() {
+ @Override
+ public Iterator<BytesRef> iterator() {
+ return new Iterator<BytesRef>() {
+ int ordUpto;
+
+ @Override
+ public boolean hasNext() {
+ return ordUpto < merger.mergedTerms.size();
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public BytesRef next() {
+ return merger.mergedTerms.get(ordUpto++);
+ }
+ };
+ }
+ },
+
+ // doc -> ord
+ new Iterable<Number>() {
+ @Override
+ public Iterator<Number> iterator() {
+ return new Iterator<Number>() {
+ int readerUpto = -1;
+ int docIDUpto;
+ int nextValue;
+ SortedBytesMerger.SegmentState currentReader;
+ Bits currentLiveDocs;
+ boolean nextIsSet;
+
+ @Override
+ public boolean hasNext() {
+ return nextIsSet || setNext();
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public Number next() {
+ if (!hasNext()) {
+ throw new NoSuchElementException();
+ }
+ assert nextIsSet;
+ nextIsSet = false;
+ // nocommit make a mutable number
+ return nextValue;
+ }
+
+ private boolean setNext() {
+ while (true) {
+ if (readerUpto == merger.segStates.size()) {
+ return false;
+ }
+
+ if (currentReader == null || docIDUpto == currentReader.reader.maxDoc()) {
+ readerUpto++;
+ if (readerUpto < merger.segStates.size()) {
+ currentReader = merger.segStates.get(readerUpto);
+ currentLiveDocs = currentReader.reader.getLiveDocs();
+ }
+ docIDUpto = 0;
+ continue;
+ }
+
+ if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
+ nextIsSet = true;
+ int segOrd = currentReader.values.getOrd(docIDUpto);
+ nextValue = currentReader.segOrdToMergedOrd[segOrd];
+ docIDUpto++;
+ return true;
+ }
+
+ docIDUpto++;
+ }
+ }
+ };
+ }
+ });
+
}
}
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesFormat.java?rev=1430090&r1=1430089&r2=1430090&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesFormat.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesFormat.java Mon Jan 7 23:41:09 2013
@@ -25,7 +25,6 @@ import org.apache.lucene.codecs.CodecUti
import org.apache.lucene.codecs.SimpleDVConsumer;
import org.apache.lucene.codecs.SimpleDVProducer;
import org.apache.lucene.codecs.SimpleDocValuesFormat;
-import org.apache.lucene.codecs.SortedDocValuesConsumer;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.FieldInfo;
@@ -128,8 +127,8 @@ public class Lucene41SimpleDocValuesForm
}
@Override
- public SortedDocValuesConsumer addSortedField(FieldInfo field, int valueCount, boolean fixedLength, int maxLength) throws IOException {
- return null;
+ public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
+ // nocommit todo
}
@Override
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java?rev=1430090&r1=1430089&r2=1430090&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java Mon Jan 7 23:41:09 2013
@@ -29,7 +29,6 @@ import org.apache.lucene.codecs.Postings
import org.apache.lucene.codecs.SimpleDVConsumer;
import org.apache.lucene.codecs.SimpleDVProducer;
import org.apache.lucene.codecs.SimpleDocValuesFormat;
-import org.apache.lucene.codecs.SortedDocValuesConsumer;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.NumericDocValues;
@@ -111,8 +110,8 @@ public abstract class PerFieldDocValuesF
}
@Override
- public SortedDocValuesConsumer addSortedField(FieldInfo field, int valueCount, boolean fixedLength, int maxLength) throws IOException {
- return getInstance(field).addSortedField(field, valueCount, fixedLength, maxLength);
+ public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
+ getInstance(field).addSortedField(field, values, docToOrd);
}
private SimpleDVConsumer getInstance(FieldInfo field) throws IOException {
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/SortedBytesDVWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/SortedBytesDVWriter.java?rev=1430090&r1=1430089&r2=1430090&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/SortedBytesDVWriter.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/SortedBytesDVWriter.java Mon Jan 7 23:41:09 2013
@@ -18,9 +18,9 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
+import java.util.Iterator;
import org.apache.lucene.codecs.SimpleDVConsumer;
-import org.apache.lucene.codecs.SortedDocValuesConsumer;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
@@ -44,11 +44,6 @@ class SortedBytesDVWriter extends DocVal
private static final BytesRef EMPTY = new BytesRef(BytesRef.EMPTY_BYTES);
private static final int DEFAULT_PENDING_SIZE = 16;
- // -2 means not set yet; -1 means length isn't fixed;
- // -otherwise it's the fixed length seen so far:
- int fixedLength = -2;
- int maxLength;
-
public SortedBytesDVWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
this.fieldInfo = fieldInfo;
this.iwBytesUsed = iwBytesUsed;
@@ -81,12 +76,10 @@ class SortedBytesDVWriter extends DocVal
public void finish(int maxDoc) {
if (pendingIndex < maxDoc) {
addOneValue(EMPTY);
- mergeLength(0);
}
}
private void addOneValue(BytesRef value) {
- mergeLength(value.length);
int ord = hash.add(value);
if (ord < 0) {
ord = -ord-1;
@@ -100,54 +93,102 @@ class SortedBytesDVWriter extends DocVal
pending[pendingIndex++] = ord;
}
- private void mergeLength(int length) {
- if (fixedLength == -2) {
- fixedLength = length;
- } else if (fixedLength != length) {
- fixedLength = -1;
- }
- maxLength = Math.max(maxLength, length);
- }
-
@Override
public void flush(SegmentWriteState state, SimpleDVConsumer dvConsumer) throws IOException {
- SortedDocValuesConsumer consumer = dvConsumer.addSortedField(fieldInfo,
- hash.size(),
- fixedLength >= 0,
- maxLength);
final int maxDoc = state.segmentInfo.getDocCount();
- int emptyOrd = -1;
+
+ final int emptyOrd;
if (pendingIndex < maxDoc) {
// Make sure we added EMPTY value before sorting:
- emptyOrd = hash.add(EMPTY);
- if (emptyOrd < 0) {
- emptyOrd = -emptyOrd-1;
+ int ord = hash.add(EMPTY);
+ if (ord < 0) {
+ emptyOrd = -ord-1;
+ } else {
+ emptyOrd = ord;
}
+ } else {
+ emptyOrd = -1;
}
- int valueCount = hash.size();
+ final int valueCount = hash.size();
- int[] sortedValues = hash.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
+ final int[] sortedValues = hash.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
final int sortedValueRamUsage = RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + RamUsageEstimator.NUM_BYTES_INT*valueCount;
iwBytesUsed.addAndGet(sortedValueRamUsage);
final int[] ordMap = new int[valueCount];
- // Write values, in sorted order:
- BytesRef scratch = new BytesRef();
+
for(int ord=0;ord<valueCount;ord++) {
- consumer.addValue(hash.get(sortedValues[ord], scratch));
ordMap[sortedValues[ord]] = ord;
}
+
final int bufferedDocCount = pendingIndex;
- for(int docID=0;docID<bufferedDocCount;docID++) {
- consumer.addDoc(ordMap[pending[docID]]);
- }
- for(int docID=bufferedDocCount;docID<maxDoc;docID++) {
- consumer.addDoc(ordMap[emptyOrd]);
- }
+ dvConsumer.addSortedField(fieldInfo,
+
+ // ord -> value
+ new Iterable<BytesRef>() {
+ @Override
+ public Iterator<BytesRef> iterator() {
+ return new Iterator<BytesRef>() {
+ int ordUpto;
+ BytesRef scratch = new BytesRef();
+
+ @Override
+ public boolean hasNext() {
+ return ordUpto < valueCount;
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public BytesRef next() {
+ hash.get(sortedValues[ordUpto], scratch);
+ ordUpto++;
+ return scratch;
+ }
+ };
+ }
+ },
+
+ // doc -> ord
+ new Iterable<Number>() {
+ @Override
+ public Iterator<Number> iterator() {
+ return new Iterator<Number>() {
+ int docUpto;
+
+ @Override
+ public boolean hasNext() {
+ return docUpto < maxDoc;
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public Number next() {
+ int ord;
+ if (docUpto < bufferedDocCount) {
+ ord = pending[docUpto];
+ } else {
+ ord = emptyOrd;
+ }
+ docUpto++;
+ // nocommit make
+ // resuable Number?
+ return ordMap[ord];
+ }
+ };
+ }
+ });
+
iwBytesUsed.addAndGet(-sortedValueRamUsage);
reset();
- consumer.finish();
}
public void abort() {
@@ -159,7 +200,5 @@ class SortedBytesDVWriter extends DocVal
pending = ArrayUtil.shrink(pending, DEFAULT_PENDING_SIZE);
pendingIndex = 0;
hash.clear();
- fixedLength = -2;
- maxLength = 0;
}
}
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java?rev=1430090&r1=1430089&r2=1430090&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java Mon Jan 7 23:41:09 2013
@@ -30,10 +30,6 @@ public abstract class SortedDocValues ex
public abstract int getValueCount();
- public abstract boolean isFixedLength();
-
- public abstract int maxLength();
-
@Override
public void get(int docID, BytesRef result) {
int ord = getOrd(docID);
@@ -174,16 +170,6 @@ public abstract class SortedDocValues ex
public int size() {
return size;
}
-
- @Override
- public boolean isFixedLength() {
- return true;
- }
-
- @Override
- public int maxLength() {
- return 0;
- }
}
/** If {@code key} exists, returns its ordinal, else
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java?rev=1430090&r1=1430089&r2=1430090&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java Mon Jan 7 23:41:09 2013
@@ -937,16 +937,12 @@ class FieldCacheImpl implements FieldCac
private final PackedInts.Reader termOrdToBytesOffset;
private final PackedInts.Reader docToTermOrd;
private final int numOrd;
- private final int maxLength;
- private final boolean isFixedLength;
- public SortedDocValuesImpl(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, PackedInts.Reader docToTermOrd, int numOrd, int maxLength, boolean isFixedLength) {
+ public SortedDocValuesImpl(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, PackedInts.Reader docToTermOrd, int numOrd) {
this.bytes = bytes;
this.docToTermOrd = docToTermOrd;
this.termOrdToBytesOffset = termOrdToBytesOffset;
this.numOrd = numOrd;
- this.maxLength = maxLength;
- this.isFixedLength = isFixedLength;
}
@Override
@@ -976,16 +972,6 @@ class FieldCacheImpl implements FieldCac
}
@Override
- public boolean isFixedLength() {
- return isFixedLength;
- }
-
- @Override
- public int maxLength() {
- return maxLength;
- }
-
- @Override
public TermsEnum getTermsEnum() {
return this.new SortedDocValuesEnum();
}
@@ -1207,9 +1193,6 @@ class FieldCacheImpl implements FieldCac
int termOrd = 0;
- int sameLength = -2;
- int maxLength = -1;
-
// TODO: use Uninvert?
if (terms != null) {
@@ -1221,12 +1204,6 @@ class FieldCacheImpl implements FieldCac
if (term == null) {
break;
}
- if (sameLength == -2) {
- sameLength = term.length;
- } else if (sameLength != term.length) {
- sameLength = -1;
- }
- maxLength = Math.max(maxLength, term.length);
if (termOrd >= termCountHardLimit) {
break;
}
@@ -1256,7 +1233,7 @@ class FieldCacheImpl implements FieldCac
}
// maybe an int-only impl?
- return new SortedDocValuesImpl(bytes.freeze(true), termOrdToBytesOffset.getMutable(), docToTermOrd.getMutable(), termOrd, maxLength, sameLength >= 0);
+ return new SortedDocValuesImpl(bytes.freeze(true), termOrdToBytesOffset.getMutable(), docToTermOrd.getMutable(), termOrd);
}
}
}
@@ -1264,14 +1241,10 @@ class FieldCacheImpl implements FieldCac
private static class BinaryDocValuesImpl extends BinaryDocValues {
private final PagedBytes.Reader bytes;
private final PackedInts.Reader docToOffset;
- private final int maxLength;
- private final boolean isFixedLength;
- public BinaryDocValuesImpl(PagedBytes.Reader bytes, PackedInts.Reader docToOffset, int maxLength, boolean isFixedLength) {
+ public BinaryDocValuesImpl(PagedBytes.Reader bytes, PackedInts.Reader docToOffset) {
this.bytes = bytes;
this.docToOffset = docToOffset;
- this.maxLength = maxLength;
- this.isFixedLength = isFixedLength;
}
@Override
@@ -1353,9 +1326,6 @@ class FieldCacheImpl implements FieldCac
// pointer==0 means not set
bytes.copyUsingLengthPrefix(new BytesRef());
- int sameLength = -2;
- int maxLength = -1;
-
if (terms != null) {
int termCount = 0;
final TermsEnum termsEnum = terms.iterator(null);
@@ -1372,12 +1342,6 @@ class FieldCacheImpl implements FieldCac
if (term == null) {
break;
}
- if (sameLength == -2) {
- sameLength = term.length;
- } else if (sameLength != term.length) {
- sameLength = -1;
- }
- maxLength = Math.max(maxLength, term.length);
final long pointer = bytes.copyUsingLengthPrefix(term);
docs = termsEnum.docs(null, docs, 0);
while (true) {
@@ -1391,7 +1355,7 @@ class FieldCacheImpl implements FieldCac
}
// maybe an int-only impl?
- return new BinaryDocValuesImpl(bytes.freeze(true), docToOffset.getMutable(), maxLength, sameLength >= 0);
+ return new BinaryDocValuesImpl(bytes.freeze(true), docToOffset.getMutable());
}
}
}
Modified: lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java?rev=1430090&r1=1430089&r2=1430090&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java Mon Jan 7 23:41:09 2013
@@ -44,7 +44,6 @@ import org.apache.lucene.store.Directory
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.LuceneTestCase;
-import org.junit.Ignore;
/**
* A very simple demo used in the API documentation (src/java/overview.html).
@@ -594,7 +593,6 @@ public class TestDemoDocValue extends Lu
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv");
- assertTrue(dv.isFixedLength()); // "hello world 1" length == "hello world 2" length
assertEquals(2, dv.getValueCount()); // 2 ords
BytesRef scratch = new BytesRef();
dv.lookupOrd(0, scratch);