You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/02/17 01:24:31 UTC
svn commit: r1446982 - in /lucene/dev/branches/lucene4765/lucene:
core/src/java/org/apache/lucene/index/ core/src/test/org/apache/lucene/
core/src/test/org/apache/lucene/index/
core/src/test/org/apache/lucene/search/
test-framework/src/java/org/apache/...
Author: rmuir
Date: Sun Feb 17 00:24:30 2013
New Revision: 1446982
URL: http://svn.apache.org/r1446982
Log:
remove all nocommits
Removed:
lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java
Modified:
lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java
lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java
lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java
lucene/dev/branches/lucene4765/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
lucene/dev/branches/lucene4765/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
Modified: lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java?rev=1446982&r1=1446981&r2=1446982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java (original)
+++ lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java Sun Feb 17 00:24:30 2013
@@ -265,6 +265,10 @@ public class DocTermOrds {
/** Call this only once (if you subclass!) */
protected void uninvert(final AtomicReader reader, final BytesRef termPrefix) throws IOException {
+ final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
+ if (info != null && info.hasDocValues()) {
+ throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
+ }
//System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix);
final long startTime = System.currentTimeMillis();
prefix = termPrefix == null ? null : BytesRef.deepCopyOf(termPrefix);
Modified: lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java?rev=1446982&r1=1446981&r2=1446982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java (original)
+++ lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java Sun Feb 17 00:24:30 2013
@@ -26,8 +26,6 @@ import org.apache.lucene.util.BytesRef;
/** Implements a {@link TermsEnum} wrapping a provided
* {@link SortedSetDocValues}. */
-// nocommit: if we are ok with ords being 'long' for SortedDocValues,
-// then we don't need this...
public class SortedSetDocValuesTermsEnum extends TermsEnum {
private final SortedSetDocValues values;
private long currentOrd = -1;
Modified: lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java?rev=1446982&r1=1446981&r2=1446982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java (original)
+++ lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java Sun Feb 17 00:24:30 2013
@@ -30,6 +30,8 @@ import java.util.TreeSet;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory;
@@ -135,7 +137,14 @@ public class TestDuelingCodecs extends L
// TODO: we should add other fields that use things like docs&freqs but omit positions,
// because linefiledocs doesn't cover all the possibilities.
for (int i = 0; i < numdocs; i++) {
- writer.addDocument(lineFileDocs.nextDoc());
+ Document document = lineFileDocs.nextDoc();
+ // grab the title and add some SortedSet instances for fun
+ String title = document.get("titleTokenized");
+ String split[] = title.split("\\s+");
+ for (String trash : split) {
+ document.add(new SortedSetDocValuesField("sortedset", new BytesRef(trash)));
+ }
+ writer.addDocument(document);
}
lineFileDocs.close();
@@ -628,7 +637,6 @@ public class TestDuelingCodecs extends L
Set<String> rightFields = getDVFields(rightReader);
assertEquals(info, leftFields, rightFields);
- // nocommit: SortedSet too
for (String field : leftFields) {
// TODO: clean this up... very messy
{
@@ -684,6 +692,36 @@ public class TestDuelingCodecs extends L
assertNull(info, rightValues);
}
}
+
+ {
+ SortedSetDocValues leftValues = MultiDocValues.getSortedSetValues(leftReader, field);
+ SortedSetDocValues rightValues = MultiDocValues.getSortedSetValues(rightReader, field);
+ if (leftValues != null && rightValues != null) {
+ // numOrds
+ assertEquals(info, leftValues.getValueCount(), rightValues.getValueCount());
+ // ords
+ BytesRef scratchLeft = new BytesRef();
+ BytesRef scratchRight = new BytesRef();
+ for (int i = 0; i < leftValues.getValueCount(); i++) {
+ leftValues.lookupOrd(i, scratchLeft);
+ rightValues.lookupOrd(i, scratchRight);
+ assertEquals(info, scratchLeft, scratchRight);
+ }
+ // ord lists
+ for(int docID=0;docID<leftReader.maxDoc();docID++) {
+ leftValues.setDocument(docID);
+ rightValues.setDocument(docID);
+ long ord;
+ while ((ord = leftValues.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+ assertEquals(info, ord, rightValues.nextOrd());
+ }
+ assertEquals(info, SortedSetDocValues.NO_MORE_ORDS, rightValues.nextOrd());
+ }
+ } else {
+ assertNull(info, leftValues);
+ assertNull(info, rightValues);
+ }
+ }
}
}
Modified: lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java?rev=1446982&r1=1446981&r2=1446982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java (original)
+++ lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java Sun Feb 17 00:24:30 2013
@@ -435,15 +435,15 @@ public class TestFieldCache extends Luce
public void testDocValuesIntegration() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
- // nocommit
- iwc.setCodec(_TestUtil.alwaysDocValuesFormat(DocValuesFormat.forName("Asserting")));
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(new BinaryDocValuesField("binary", new BytesRef("binary value")));
doc.add(new SortedDocValuesField("sorted", new BytesRef("sorted value")));
doc.add(new NumericDocValuesField("numeric", 42));
- doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1")));
- doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2")));
+ if (defaultCodecSupportsSortedSet()) {
+ doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1")));
+ doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2")));
+ }
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
iw.close();
@@ -471,6 +471,11 @@ public class TestFieldCache extends Luce
fail();
} catch (IllegalStateException expected) {}
+ try {
+ new DocTermOrds(ar, "binary");
+ fail();
+ } catch (IllegalStateException expected) {}
+
Bits bits = FieldCache.DEFAULT.getDocsWithField(ar, "binary");
assertTrue(bits instanceof Bits.MatchAllBits);
@@ -480,6 +485,11 @@ public class TestFieldCache extends Luce
fail();
} catch (IllegalStateException expected) {}
+ try {
+ new DocTermOrds(ar, "sorted");
+ fail();
+ } catch (IllegalStateException expected) {}
+
binary = FieldCache.DEFAULT.getTerms(ar, "sorted");
binary.get(0, scratch);
assertEquals("sorted value", scratch.utf8ToString());
@@ -518,35 +528,47 @@ public class TestFieldCache extends Luce
fail();
} catch (IllegalStateException expected) {}
- bits = FieldCache.DEFAULT.getDocsWithField(ar, "numeric");
- assertTrue(bits instanceof Bits.MatchAllBits);
-
- // SortedSet type: can be retrieved via getDocTermOrds()
try {
- FieldCache.DEFAULT.getInts(ar, "sortedset", false);
+ new DocTermOrds(ar, "numeric");
fail();
} catch (IllegalStateException expected) {}
- try {
- FieldCache.DEFAULT.getTerms(ar, "sortedset");
- fail();
- } catch (IllegalStateException expected) {}
-
- try {
- FieldCache.DEFAULT.getTermsIndex(ar, "sortedset");
- fail();
- } catch (IllegalStateException expected) {}
-
- sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sortedset");
- sortedSet.setDocument(0);
- assertEquals(0, sortedSet.nextOrd());
- assertEquals(1, sortedSet.nextOrd());
- assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
- assertEquals(2, sortedSet.getValueCount());
-
- bits = FieldCache.DEFAULT.getDocsWithField(ar, "sortedset");
+ bits = FieldCache.DEFAULT.getDocsWithField(ar, "numeric");
assertTrue(bits instanceof Bits.MatchAllBits);
+ // SortedSet type: can be retrieved via getDocTermOrds()
+ if (defaultCodecSupportsSortedSet()) {
+ try {
+ FieldCache.DEFAULT.getInts(ar, "sortedset", false);
+ fail();
+ } catch (IllegalStateException expected) {}
+
+ try {
+ FieldCache.DEFAULT.getTerms(ar, "sortedset");
+ fail();
+ } catch (IllegalStateException expected) {}
+
+ try {
+ FieldCache.DEFAULT.getTermsIndex(ar, "sortedset");
+ fail();
+ } catch (IllegalStateException expected) {}
+
+ try {
+ new DocTermOrds(ar, "sortedset");
+ fail();
+ } catch (IllegalStateException expected) {}
+
+ sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sortedset");
+ sortedSet.setDocument(0);
+ assertEquals(0, sortedSet.nextOrd());
+ assertEquals(1, sortedSet.nextOrd());
+ assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
+ assertEquals(2, sortedSet.getValueCount());
+
+ bits = FieldCache.DEFAULT.getDocsWithField(ar, "sortedset");
+ assertTrue(bits instanceof Bits.MatchAllBits);
+ }
+
ir.close();
dir.close();
}
Modified: lucene/dev/branches/lucene4765/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java?rev=1446982&r1=1446981&r2=1446982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java (original)
+++ lucene/dev/branches/lucene4765/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java Sun Feb 17 00:24:30 2013
@@ -17,10 +17,16 @@ package org.apache.lucene.index;
* limitations under the License.
*/
+import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
+
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.TreeSet;
import java.util.Map.Entry;
import org.apache.lucene.analysis.Analyzer;
@@ -34,12 +40,14 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
@@ -1266,4 +1274,602 @@ public abstract class BaseDocValuesForma
doTestSortedVsStoredFields(1, 10);
}
}
+
+ public void testSortedSetOneValue() throws IOException {
+ assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+ Directory directory = newDirectory();
+ RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
+
+ Document doc = new Document();
+ doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
+ iwriter.addDocument(doc);
+
+ DirectoryReader ireader = iwriter.getReader();
+ iwriter.close();
+
+ SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
+
+ dv.setDocument(0);
+ assertEquals(0, dv.nextOrd());
+ assertEquals(NO_MORE_ORDS, dv.nextOrd());
+
+ BytesRef bytes = new BytesRef();
+ dv.lookupOrd(0, bytes);
+ assertEquals(new BytesRef("hello"), bytes);
+
+ ireader.close();
+ directory.close();
+ }
+
+ public void testSortedSetTwoFields() throws IOException {
+ assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+ Directory directory = newDirectory();
+ RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
+
+ Document doc = new Document();
+ doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
+ doc.add(new SortedSetDocValuesField("field2", new BytesRef("world")));
+ iwriter.addDocument(doc);
+
+ DirectoryReader ireader = iwriter.getReader();
+ iwriter.close();
+
+ SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
+
+ dv.setDocument(0);
+ assertEquals(0, dv.nextOrd());
+ assertEquals(NO_MORE_ORDS, dv.nextOrd());
+
+ BytesRef bytes = new BytesRef();
+ dv.lookupOrd(0, bytes);
+ assertEquals(new BytesRef("hello"), bytes);
+
+ dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field2");
+
+ dv.setDocument(0);
+ assertEquals(0, dv.nextOrd());
+ assertEquals(NO_MORE_ORDS, dv.nextOrd());
+
+ dv.lookupOrd(0, bytes);
+ assertEquals(new BytesRef("world"), bytes);
+
+ ireader.close();
+ directory.close();
+ }
+
+ public void testSortedSetTwoDocumentsMerged() throws IOException {
+ assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+ Directory directory = newDirectory();
+ Analyzer analyzer = new MockAnalyzer(random());
+ IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
+ iwconfig.setMergePolicy(newLogMergePolicy());
+ RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
+
+ Document doc = new Document();
+ doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
+ iwriter.addDocument(doc);
+ iwriter.commit();
+
+ doc = new Document();
+ doc.add(new SortedSetDocValuesField("field", new BytesRef("world")));
+ iwriter.addDocument(doc);
+ iwriter.forceMerge(1);
+
+ DirectoryReader ireader = iwriter.getReader();
+ iwriter.close();
+
+ SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
+ assertEquals(2, dv.getValueCount());
+
+ dv.setDocument(0);
+ assertEquals(0, dv.nextOrd());
+ assertEquals(NO_MORE_ORDS, dv.nextOrd());
+
+ BytesRef bytes = new BytesRef();
+ dv.lookupOrd(0, bytes);
+ assertEquals(new BytesRef("hello"), bytes);
+
+ dv.setDocument(1);
+ assertEquals(1, dv.nextOrd());
+ assertEquals(NO_MORE_ORDS, dv.nextOrd());
+
+ dv.lookupOrd(1, bytes);
+ assertEquals(new BytesRef("world"), bytes);
+
+ ireader.close();
+ directory.close();
+ }
+
+ public void testSortedSetTwoValues() throws IOException {
+ assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+ Directory directory = newDirectory();
+ RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
+
+ Document doc = new Document();
+ doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
+ doc.add(new SortedSetDocValuesField("field", new BytesRef("world")));
+ iwriter.addDocument(doc);
+
+ DirectoryReader ireader = iwriter.getReader();
+ iwriter.close();
+
+ SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
+
+ dv.setDocument(0);
+ assertEquals(0, dv.nextOrd());
+ assertEquals(1, dv.nextOrd());
+ assertEquals(NO_MORE_ORDS, dv.nextOrd());
+
+ BytesRef bytes = new BytesRef();
+ dv.lookupOrd(0, bytes);
+ assertEquals(new BytesRef("hello"), bytes);
+
+ dv.lookupOrd(1, bytes);
+ assertEquals(new BytesRef("world"), bytes);
+
+ ireader.close();
+ directory.close();
+ }
+
+ public void testSortedSetTwoValuesUnordered() throws IOException {
+ assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+ Directory directory = newDirectory();
+ RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
+
+ Document doc = new Document();
+ doc.add(new SortedSetDocValuesField("field", new BytesRef("world")));
+ doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
+ iwriter.addDocument(doc);
+
+ DirectoryReader ireader = iwriter.getReader();
+ iwriter.close();
+
+ SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
+
+ dv.setDocument(0);
+ assertEquals(0, dv.nextOrd());
+ assertEquals(1, dv.nextOrd());
+ assertEquals(NO_MORE_ORDS, dv.nextOrd());
+
+ BytesRef bytes = new BytesRef();
+ dv.lookupOrd(0, bytes);
+ assertEquals(new BytesRef("hello"), bytes);
+
+ dv.lookupOrd(1, bytes);
+ assertEquals(new BytesRef("world"), bytes);
+
+ ireader.close();
+ directory.close();
+ }
+
+ public void testSortedSetThreeValuesTwoDocs() throws IOException {
+ assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+ Directory directory = newDirectory();
+ Analyzer analyzer = new MockAnalyzer(random());
+ IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
+ iwconfig.setMergePolicy(newLogMergePolicy());
+ RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
+
+ Document doc = new Document();
+ doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
+ doc.add(new SortedSetDocValuesField("field", new BytesRef("world")));
+ iwriter.addDocument(doc);
+ iwriter.commit();
+
+ doc = new Document();
+ doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
+ doc.add(new SortedSetDocValuesField("field", new BytesRef("beer")));
+ iwriter.addDocument(doc);
+ iwriter.forceMerge(1);
+
+ DirectoryReader ireader = iwriter.getReader();
+ iwriter.close();
+
+ SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
+ assertEquals(3, dv.getValueCount());
+
+ dv.setDocument(0);
+ assertEquals(1, dv.nextOrd());
+ assertEquals(2, dv.nextOrd());
+ assertEquals(NO_MORE_ORDS, dv.nextOrd());
+
+ dv.setDocument(1);
+ assertEquals(0, dv.nextOrd());
+ assertEquals(1, dv.nextOrd());
+ assertEquals(NO_MORE_ORDS, dv.nextOrd());
+
+ BytesRef bytes = new BytesRef();
+ dv.lookupOrd(0, bytes);
+ assertEquals(new BytesRef("beer"), bytes);
+
+ dv.lookupOrd(1, bytes);
+ assertEquals(new BytesRef("hello"), bytes);
+
+ dv.lookupOrd(2, bytes);
+ assertEquals(new BytesRef("world"), bytes);
+
+ ireader.close();
+ directory.close();
+ }
+
+ public void testSortedSetTwoDocumentsLastMissing() throws IOException {
+ assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+ Directory directory = newDirectory();
+ Analyzer analyzer = new MockAnalyzer(random());
+ IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
+ iwconfig.setMergePolicy(newLogMergePolicy());
+ RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
+
+ Document doc = new Document();
+ doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
+ iwriter.addDocument(doc);
+
+ doc = new Document();
+ iwriter.addDocument(doc);
+ iwriter.forceMerge(1);
+ DirectoryReader ireader = iwriter.getReader();
+ iwriter.close();
+
+ SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
+ assertEquals(1, dv.getValueCount());
+
+ dv.setDocument(0);
+ assertEquals(0, dv.nextOrd());
+ assertEquals(NO_MORE_ORDS, dv.nextOrd());
+
+ BytesRef bytes = new BytesRef();
+ dv.lookupOrd(0, bytes);
+ assertEquals(new BytesRef("hello"), bytes);
+
+ ireader.close();
+ directory.close();
+ }
+
+ public void testSortedSetTwoDocumentsLastMissingMerge() throws IOException {
+ assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+ Directory directory = newDirectory();
+ Analyzer analyzer = new MockAnalyzer(random());
+ IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
+ iwconfig.setMergePolicy(newLogMergePolicy());
+ RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
+
+ Document doc = new Document();
+ doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
+ iwriter.addDocument(doc);
+ iwriter.commit();
+
+ doc = new Document();
+ iwriter.addDocument(doc);
+ iwriter.forceMerge(1);
+
+ DirectoryReader ireader = iwriter.getReader();
+ iwriter.close();
+
+ SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
+ assertEquals(1, dv.getValueCount());
+
+ dv.setDocument(0);
+ assertEquals(0, dv.nextOrd());
+ assertEquals(NO_MORE_ORDS, dv.nextOrd());
+
+ BytesRef bytes = new BytesRef();
+ dv.lookupOrd(0, bytes);
+ assertEquals(new BytesRef("hello"), bytes);
+
+ ireader.close();
+ directory.close();
+ }
+
+ public void testSortedSetTwoDocumentsFirstMissing() throws IOException {
+ assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+ Directory directory = newDirectory();
+ Analyzer analyzer = new MockAnalyzer(random());
+ IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
+ iwconfig.setMergePolicy(newLogMergePolicy());
+ RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
+
+ Document doc = new Document();
+ iwriter.addDocument(doc);
+
+ doc = new Document();
+ doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
+ iwriter.addDocument(doc);
+
+ iwriter.forceMerge(1);
+ DirectoryReader ireader = iwriter.getReader();
+ iwriter.close();
+
+ SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
+ assertEquals(1, dv.getValueCount());
+
+ dv.setDocument(1);
+ assertEquals(0, dv.nextOrd());
+ assertEquals(NO_MORE_ORDS, dv.nextOrd());
+
+ BytesRef bytes = new BytesRef();
+ dv.lookupOrd(0, bytes);
+ assertEquals(new BytesRef("hello"), bytes);
+
+ ireader.close();
+ directory.close();
+ }
+
+ public void testSortedSetTwoDocumentsFirstMissingMerge() throws IOException {
+ assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+ Directory directory = newDirectory();
+ Analyzer analyzer = new MockAnalyzer(random());
+ IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
+ iwconfig.setMergePolicy(newLogMergePolicy());
+ RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
+
+ Document doc = new Document();
+ iwriter.addDocument(doc);
+ iwriter.commit();
+
+ doc = new Document();
+ doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
+ iwriter.addDocument(doc);
+ iwriter.forceMerge(1);
+
+ DirectoryReader ireader = iwriter.getReader();
+ iwriter.close();
+
+ SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
+ assertEquals(1, dv.getValueCount());
+
+ dv.setDocument(1);
+ assertEquals(0, dv.nextOrd());
+ assertEquals(NO_MORE_ORDS, dv.nextOrd());
+
+ BytesRef bytes = new BytesRef();
+ dv.lookupOrd(0, bytes);
+ assertEquals(new BytesRef("hello"), bytes);
+
+ ireader.close();
+ directory.close();
+ }
+
+ public void testSortedSetMergeAwayAllValues() throws IOException {
+ assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+ Directory directory = newDirectory();
+ Analyzer analyzer = new MockAnalyzer(random());
+ IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
+ iwconfig.setMergePolicy(newLogMergePolicy());
+ RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
+
+ Document doc = new Document();
+ doc.add(new StringField("id", "0", Field.Store.NO));
+ iwriter.addDocument(doc);
+ doc = new Document();
+ doc.add(new StringField("id", "1", Field.Store.NO));
+ doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
+ iwriter.addDocument(doc);
+ iwriter.commit();
+ iwriter.deleteDocuments(new Term("id", "1"));
+ iwriter.forceMerge(1);
+
+ DirectoryReader ireader = iwriter.getReader();
+ iwriter.close();
+
+ SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
+ assertEquals(0, dv.getValueCount());
+
+ ireader.close();
+ directory.close();
+ }
+
+ private void doTestSortedSetVsStoredFields(int minLength, int maxLength) throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+ RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
+
+ // index some docs
+ int numDocs = atLeast(1000);
+ for (int i = 0; i < numDocs; i++) {
+ Document doc = new Document();
+ Field idField = new StringField("id", Integer.toString(i), Field.Store.NO);
+ doc.add(idField);
+ final int length;
+ if (minLength == maxLength) {
+ length = minLength; // fixed length
+ } else {
+ length = _TestUtil.nextInt(random(), minLength, maxLength);
+ }
+ int numValues = random().nextInt(17);
+ // create a random set of strings
+ Set<String> values = new TreeSet<String>();
+ for (int v = 0; v < numValues; v++) {
+ values.add(_TestUtil.randomSimpleString(random(), length));
+ }
+
+ // add ordered to the stored field
+ for (String v : values) {
+ doc.add(new StoredField("stored", v));
+ }
+
+ // add in any order to the dv field
+ ArrayList<String> unordered = new ArrayList<String>(values);
+ Collections.shuffle(unordered, random());
+ for (String v : unordered) {
+ doc.add(new SortedSetDocValuesField("dv", new BytesRef(v)));
+ }
+
+ writer.addDocument(doc);
+ if (random().nextInt(31) == 0) {
+ writer.commit();
+ }
+ }
+
+ // delete some docs
+ int numDeletions = random().nextInt(numDocs/10);
+ for (int i = 0; i < numDeletions; i++) {
+ int id = random().nextInt(numDocs);
+ writer.deleteDocuments(new Term("id", Integer.toString(id)));
+ }
+ writer.close();
+
+ // compare
+ DirectoryReader ir = DirectoryReader.open(dir);
+ for (AtomicReaderContext context : ir.leaves()) {
+ AtomicReader r = context.reader();
+ SortedSetDocValues docValues = r.getSortedSetDocValues("dv");
+ BytesRef scratch = new BytesRef();
+ for (int i = 0; i < r.maxDoc(); i++) {
+ String stringValues[] = r.document(i).getValues("stored");
+ if (docValues != null) {
+ docValues.setDocument(i);
+ }
+ for (int j = 0; j < stringValues.length; j++) {
+ assert docValues != null;
+ long ord = docValues.nextOrd();
+ assert ord != NO_MORE_ORDS;
+ docValues.lookupOrd(ord, scratch);
+ assertEquals(stringValues[j], scratch.utf8ToString());
+ }
+ assert docValues == null || docValues.nextOrd() == NO_MORE_ORDS;
+ }
+ }
+ ir.close();
+ dir.close();
+ }
+
+ public void testSortedSetFixedLengthVsStoredFields() throws Exception {
+ assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+ int numIterations = atLeast(1);
+ for (int i = 0; i < numIterations; i++) {
+ int fixedLength = _TestUtil.nextInt(random(), 1, 10);
+ doTestSortedSetVsStoredFields(fixedLength, fixedLength);
+ }
+ }
+
+ public void testSortedSetVariableLengthVsStoredFields() throws Exception {
+ assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+ int numIterations = atLeast(1);
+ for (int i = 0; i < numIterations; i++) {
+ doTestSortedSetVsStoredFields(1, 10);
+ }
+ }
+
+ private void assertEquals(int maxDoc, SortedSetDocValues expected, SortedSetDocValues actual) throws Exception {
+ // can be null for the segment if no docs actually had any SortedDocValues
+ // in this case FC.getDocTermsOrds returns EMPTY
+ if (actual == null) {
+ assertEquals(SortedSetDocValues.EMPTY, expected);
+ return;
+ }
+ assertEquals(expected.getValueCount(), actual.getValueCount());
+ // compare ord lists
+ for (int i = 0; i < maxDoc; i++) {
+ expected.setDocument(i);
+ actual.setDocument(i);
+ long expectedOrd;
+ while ((expectedOrd = expected.nextOrd()) != NO_MORE_ORDS) {
+ assertEquals(expectedOrd, actual.nextOrd());
+ }
+ assertEquals(NO_MORE_ORDS, actual.nextOrd());
+ }
+
+ // compare ord dictionary
+ BytesRef expectedBytes = new BytesRef();
+ BytesRef actualBytes = new BytesRef();
+ for (long i = 0; i < expected.getValueCount(); i++) {
+ expected.lookupTerm(expectedBytes);
+ actual.lookupTerm(actualBytes);
+ assertEquals(expectedBytes, actualBytes);
+ }
+ }
+
+ private void doTestSortedSetVsUninvertedField(int minLength, int maxLength) throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+ RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
+
+ // index some docs
+ int numDocs = atLeast(1000);
+ for (int i = 0; i < numDocs; i++) {
+ Document doc = new Document();
+ Field idField = new StringField("id", Integer.toString(i), Field.Store.NO);
+ doc.add(idField);
+ final int length;
+ if (minLength == maxLength) {
+ length = minLength; // fixed length
+ } else {
+ length = _TestUtil.nextInt(random(), minLength, maxLength);
+ }
+ int numValues = random().nextInt(17);
+ // create a random list of strings
+ List<String> values = new ArrayList<String>();
+ for (int v = 0; v < numValues; v++) {
+ values.add(_TestUtil.randomSimpleString(random(), length));
+ }
+
+ // add in any order to the indexed field
+ ArrayList<String> unordered = new ArrayList<String>(values);
+ Collections.shuffle(unordered, random());
+ for (String v : values) {
+ doc.add(newStringField("indexed", v, Field.Store.NO));
+ }
+
+ // add in any order to the dv field
+ ArrayList<String> unordered2 = new ArrayList<String>(values);
+ Collections.shuffle(unordered2, random());
+ for (String v : unordered2) {
+ doc.add(new SortedSetDocValuesField("dv", new BytesRef(v)));
+ }
+
+ writer.addDocument(doc);
+ if (random().nextInt(31) == 0) {
+ writer.commit();
+ }
+ }
+
+ // compare per-segment
+ // NOTE: we must do this before deleting, because FC.getDocTermsOrds/UninvertedField
+ // "bakes in" the deletes at the time it was first called.
+ DirectoryReader ir = writer.getReader();
+ for (AtomicReaderContext context : ir.leaves()) {
+ AtomicReader r = context.reader();
+ SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(r, "indexed");
+ SortedSetDocValues actual = r.getSortedSetDocValues("dv");
+ assertEquals(r.maxDoc(), expected, actual);
+ }
+ ir.close();
+
+ // delete some docs
+ int numDeletions = random().nextInt(numDocs/10);
+ for (int i = 0; i < numDeletions; i++) {
+ int id = random().nextInt(numDocs);
+ writer.deleteDocuments(new Term("id", Integer.toString(id)));
+ }
+
+ writer.forceMerge(1);
+
+ // now compare again after the merge
+ ir = writer.getReader();
+ AtomicReader ar = getOnlySegmentReader(ir);
+ SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(ar, "indexed");
+ SortedSetDocValues actual = ar.getSortedSetDocValues("dv");
+ assertEquals(ir.maxDoc(), expected, actual);
+ ir.close();
+
+ writer.close();
+ dir.close();
+ }
+
+ public void testSortedSetFixedLengthVsUninvertedField() throws Exception {
+ assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+ int numIterations = atLeast(1);
+ for (int i = 0; i < numIterations; i++) {
+ int fixedLength = _TestUtil.nextInt(random(), 1, 10);
+ doTestSortedSetVsUninvertedField(fixedLength, fixedLength);
+ }
+ }
+
+ public void testSortedSetVariableLengthVsUninvertedField() throws Exception {
+ assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+ int numIterations = atLeast(1);
+ for (int i = 0; i < numIterations; i++) {
+ doTestSortedSetVsUninvertedField(1, 10);
+ }
+ }
}
Modified: lucene/dev/branches/lucene4765/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java?rev=1446982&r1=1446981&r2=1446982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java (original)
+++ lucene/dev/branches/lucene4765/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java Sun Feb 17 00:24:30 2013
@@ -26,6 +26,7 @@ import java.util.concurrent.*;
import java.util.logging.Logger;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
@@ -1296,4 +1297,13 @@ public abstract class LuceneTestCase ext
throw new IOException("Cannot find resource: " + name);
}
}
+
+ /** Returns true if the default codec supports SORTED_SET docvalues */
+ public static boolean defaultCodecSupportsSortedSet() {
+ String name = Codec.getDefault().getName();
+ if (name.equals("Lucene40") || name.equals("Lucene41")) {
+ return false;
+ }
+ return true;
+ }
}