You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/02/11 05:55:55 UTC
svn commit: r1444655 - in /lucene/dev/branches/lucene4765/lucene/core/src:
java/org/apache/lucene/codecs/ java/org/apache/lucene/codecs/lucene42/
java/org/apache/lucene/index/ test/org/apache/lucene/
Author: rmuir
Date: Mon Feb 11 04:55:55 2013
New Revision: 1444655
URL: http://svn.apache.org/r1444655
Log:
add test vs. stored fields and fix bugs
Modified:
lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java
lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java
Modified: lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java?rev=1444655&r1=1444654&r2=1444655&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java (original)
+++ lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java Mon Feb 11 04:55:55 2013
@@ -573,6 +573,7 @@ public abstract class DocValuesConsumer
nextIsSet = true;
return true;
} else {
+ iterator = null;
docIDUpto++;
}
}
Modified: lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java?rev=1444655&r1=1444654&r2=1444655&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java (original)
+++ lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java Mon Feb 11 04:55:55 2013
@@ -320,7 +320,7 @@ class Lucene42DocValuesProducer extends
} else {
iterator = new Lucene42OrdsIterator(docToOrds);
}
- iterator.reset(docID);
+ iterator.reset(docToOrds, docID);
return iterator;
}
@@ -363,7 +363,7 @@ class Lucene42DocValuesProducer extends
}
static class Lucene42OrdsIterator extends OrdIterator {
- final BinaryDocValues data;
+ BinaryDocValues data;
final BytesRef ref = new BytesRef();
final ByteArrayDataInput input = new ByteArrayDataInput();
long currentOrd;
@@ -382,7 +382,8 @@ class Lucene42DocValuesProducer extends
}
}
- void reset(int docid) {
+ void reset(BinaryDocValues data, int docid) {
+ this.data = data;
data.get(docid, ref);
input.reset(ref.bytes, ref.offset, ref.length);
currentOrd = 0;
Modified: lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java?rev=1444655&r1=1444654&r2=1444655&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java (original)
+++ lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java Mon Feb 11 04:55:55 2013
@@ -39,6 +39,7 @@ public abstract class SortedSetDocValues
* @return iterator over ordinals for the document: these are dense,
* start at 0, then increment by 1 for the next value in sorted order.
*/
+ // nocommit: can we think of a better api? this asks for reuse bugs etc.
public abstract OrdIterator getOrds(int docID, OrdIterator reuse);
/** Retrieves the value for the specified ordinal.
Modified: lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java?rev=1444655&r1=1444654&r2=1444655&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java (original)
+++ lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java Mon Feb 11 04:55:55 2013
@@ -250,7 +250,7 @@ class SortedSetDocValuesWriter extends D
if (!hasNext()) {
throw new NoSuchElementException();
}
- if (currentUpto == currentLength) {
+ while (currentUpto == currentLength) {
// refill next doc, and sort remapped ords within the doc.
currentUpto = 0;
currentLength = (int) counts.next();
Modified: lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java?rev=1444655&r1=1444654&r2=1444655&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java (original)
+++ lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java Mon Feb 11 04:55:55 2013
@@ -18,17 +18,28 @@ package org.apache.lucene;
*/
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Set;
+import java.util.TreeSet;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedSetDocValuesField;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.AtomicReader;
+import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.index.Term;
import org.apache.lucene.index.SortedSetDocValues.OrdIterator;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
@@ -388,4 +399,93 @@ public class TestDemoDocValue extends Lu
ireader.close();
directory.close();
}
+
+
+ private void doTestSortedSetVsStoredFields(int minLength, int maxLength) throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+ RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
+
+ // index some docs
+ int numDocs = atLeast(1000);
+ for (int i = 0; i < numDocs; i++) {
+ Document doc = new Document();
+ Field idField = new StringField("id", Integer.toString(i), Field.Store.NO);
+ doc.add(idField);
+ final int length;
+ if (minLength == maxLength) {
+ length = minLength; // fixed length
+ } else {
+ length = _TestUtil.nextInt(random(), minLength, maxLength);
+ }
+ int numValues = random().nextInt(17);
+ // create a random set of strings
+ Set<String> values = new TreeSet<String>();
+ for (int v = 0; v < numValues; v++) {
+ values.add(_TestUtil.randomSimpleString(random(), length));
+ }
+
+ // add ordered to the stored field
+ for (String v : values) {
+ doc.add(new StoredField("stored", v));
+ }
+
+ // add in any order to the dv field
+ ArrayList<String> unordered = new ArrayList<String>(values);
+ Collections.shuffle(unordered, random());
+ for (String v : unordered) {
+ doc.add(new SortedSetDocValuesField("dv", new BytesRef(v)));
+ }
+
+ writer.addDocument(doc);
+ if (random().nextInt(31) == 0) {
+ writer.commit();
+ }
+ }
+
+ // delete some docs
+ int numDeletions = random().nextInt(numDocs/10);
+ for (int i = 0; i < numDeletions; i++) {
+ int id = random().nextInt(numDocs);
+ writer.deleteDocuments(new Term("id", Integer.toString(id)));
+ }
+ writer.close();
+
+ // compare
+ DirectoryReader ir = DirectoryReader.open(dir);
+ for (AtomicReaderContext context : ir.leaves()) {
+ AtomicReader r = context.reader();
+ SortedSetDocValues docValues = r.getSortedSetDocValues("dv");
+ OrdIterator ords = null;
+ BytesRef scratch = new BytesRef();
+ for (int i = 0; i < r.maxDoc(); i++) {
+ String stringValues[] = r.document(i).getValues("stored");
+ ords = docValues.getOrds(i, ords);
+ for (int j = 0; j < stringValues.length; j++) {
+ long ord = ords.nextOrd();
+ assert ord != OrdIterator.NO_MORE_ORDS;
+ docValues.lookupOrd(ord, scratch);
+ assertEquals(stringValues[j], scratch.utf8ToString());
+ }
+ assert ords.nextOrd() == OrdIterator.NO_MORE_ORDS;
+ }
+ }
+ ir.close();
+ dir.close();
+ }
+
+ public void testSortedSetFixedLengthVsStoredFields() throws Exception {
+ int numIterations = atLeast(1);
+ for (int i = 0; i < numIterations; i++) {
+ int fixedLength = _TestUtil.nextInt(random(), 1, 10);
+ doTestSortedSetVsStoredFields(fixedLength, fixedLength);
+ }
+ }
+
+ public void testSortedSetVariableLengthVsStoredFields() throws Exception {
+ int numIterations = atLeast(1);
+ for (int i = 0; i < numIterations; i++) {
+ doTestSortedSetVsStoredFields(1, 10);
+ }
+ }
}