You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/02/17 01:24:31 UTC

svn commit: r1446982 - in /lucene/dev/branches/lucene4765/lucene: core/src/java/org/apache/lucene/index/ core/src/test/org/apache/lucene/ core/src/test/org/apache/lucene/index/ core/src/test/org/apache/lucene/search/ test-framework/src/java/org/apache/...

Author: rmuir
Date: Sun Feb 17 00:24:30 2013
New Revision: 1446982

URL: http://svn.apache.org/r1446982
Log:
remove all nocommits

Removed:
    lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java
Modified:
    lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
    lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java
    lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java
    lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java
    lucene/dev/branches/lucene4765/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
    lucene/dev/branches/lucene4765/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java

Modified: lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java?rev=1446982&r1=1446981&r2=1446982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java (original)
+++ lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java Sun Feb 17 00:24:30 2013
@@ -265,6 +265,10 @@ public class DocTermOrds {
 
   /** Call this only once (if you subclass!) */
   protected void uninvert(final AtomicReader reader, final BytesRef termPrefix) throws IOException {
+    final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
+    if (info != null && info.hasDocValues()) {
+      throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
+    }
     //System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix);
     final long startTime = System.currentTimeMillis();
     prefix = termPrefix == null ? null : BytesRef.deepCopyOf(termPrefix);

Modified: lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java?rev=1446982&r1=1446981&r2=1446982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java (original)
+++ lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java Sun Feb 17 00:24:30 2013
@@ -26,8 +26,6 @@ import org.apache.lucene.util.BytesRef;
 /** Implements a {@link TermsEnum} wrapping a provided
  * {@link SortedSetDocValues}. */
 
-// nocommit: if we are ok with ords being 'long' for SortedDocValues,
-// then we don't need this...
 public class SortedSetDocValuesTermsEnum extends TermsEnum {
   private final SortedSetDocValues values;
   private long currentOrd = -1;

Modified: lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java?rev=1446982&r1=1446981&r2=1446982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java (original)
+++ lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java Sun Feb 17 00:24:30 2013
@@ -30,6 +30,8 @@ import java.util.TreeSet;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.SortedSetDocValuesField;
 import org.apache.lucene.index.TermsEnum.SeekStatus;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.store.Directory;
@@ -135,7 +137,14 @@ public class TestDuelingCodecs extends L
     // TODO: we should add other fields that use things like docs&freqs but omit positions,
     // because linefiledocs doesn't cover all the possibilities.
     for (int i = 0; i < numdocs; i++) {
-      writer.addDocument(lineFileDocs.nextDoc());
+      Document document = lineFileDocs.nextDoc();
+      // grab the title and add some SortedSet instances for fun
+      String title = document.get("titleTokenized");
+      String split[] = title.split("\\s+");
+      for (String trash : split) {
+        document.add(new SortedSetDocValuesField("sortedset", new BytesRef(trash)));
+      }
+      writer.addDocument(document);
     }
     
     lineFileDocs.close();
@@ -628,7 +637,6 @@ public class TestDuelingCodecs extends L
     Set<String> rightFields = getDVFields(rightReader);
     assertEquals(info, leftFields, rightFields);
 
-    // nocommit: SortedSet too
     for (String field : leftFields) {
       // TODO: clean this up... very messy
       {
@@ -684,6 +692,36 @@ public class TestDuelingCodecs extends L
           assertNull(info, rightValues);
         }
       }
+      
+      {
+        SortedSetDocValues leftValues = MultiDocValues.getSortedSetValues(leftReader, field);
+        SortedSetDocValues rightValues = MultiDocValues.getSortedSetValues(rightReader, field);
+        if (leftValues != null && rightValues != null) {
+          // numOrds
+          assertEquals(info, leftValues.getValueCount(), rightValues.getValueCount());
+          // ords
+          BytesRef scratchLeft = new BytesRef();
+          BytesRef scratchRight = new BytesRef();
+          for (int i = 0; i < leftValues.getValueCount(); i++) {
+            leftValues.lookupOrd(i, scratchLeft);
+            rightValues.lookupOrd(i, scratchRight);
+            assertEquals(info, scratchLeft, scratchRight);
+          }
+          // ord lists
+          for(int docID=0;docID<leftReader.maxDoc();docID++) {
+            leftValues.setDocument(docID);
+            rightValues.setDocument(docID);
+            long ord;
+            while ((ord = leftValues.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+              assertEquals(info, ord, rightValues.nextOrd());
+            }
+            assertEquals(info, SortedSetDocValues.NO_MORE_ORDS, rightValues.nextOrd());
+          }
+        } else {
+          assertNull(info, leftValues);
+          assertNull(info, rightValues);
+        }
+      }
     }
   }
   

Modified: lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java?rev=1446982&r1=1446981&r2=1446982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java (original)
+++ lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java Sun Feb 17 00:24:30 2013
@@ -435,15 +435,15 @@ public class TestFieldCache extends Luce
   public void testDocValuesIntegration() throws Exception {
     Directory dir = newDirectory();
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
-    // nocommit
-    iwc.setCodec(_TestUtil.alwaysDocValuesFormat(DocValuesFormat.forName("Asserting"))); 
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
     Document doc = new Document();
     doc.add(new BinaryDocValuesField("binary", new BytesRef("binary value")));
     doc.add(new SortedDocValuesField("sorted", new BytesRef("sorted value")));
     doc.add(new NumericDocValuesField("numeric", 42));
-    doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1")));
-    doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2")));
+    if (defaultCodecSupportsSortedSet()) {
+      doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1")));
+      doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2")));
+    }
     iw.addDocument(doc);
     DirectoryReader ir = iw.getReader();
     iw.close();
@@ -471,6 +471,11 @@ public class TestFieldCache extends Luce
       fail();
     } catch (IllegalStateException expected) {}
     
+    try {
+      new DocTermOrds(ar, "binary");
+      fail();
+    } catch (IllegalStateException expected) {}
+    
     Bits bits = FieldCache.DEFAULT.getDocsWithField(ar, "binary");
     assertTrue(bits instanceof Bits.MatchAllBits);
     
@@ -480,6 +485,11 @@ public class TestFieldCache extends Luce
       fail();
     } catch (IllegalStateException expected) {}
     
+    try {
+      new DocTermOrds(ar, "sorted");
+      fail();
+    } catch (IllegalStateException expected) {}
+    
     binary = FieldCache.DEFAULT.getTerms(ar, "sorted");
     binary.get(0, scratch);
     assertEquals("sorted value", scratch.utf8ToString());
@@ -518,35 +528,47 @@ public class TestFieldCache extends Luce
       fail();
     } catch (IllegalStateException expected) {}
     
-    bits = FieldCache.DEFAULT.getDocsWithField(ar, "numeric");
-    assertTrue(bits instanceof Bits.MatchAllBits);
-    
-    // SortedSet type: can be retrieved via getDocTermOrds() 
     try {
-      FieldCache.DEFAULT.getInts(ar, "sortedset", false);
+      new DocTermOrds(ar, "numeric");
       fail();
     } catch (IllegalStateException expected) {}
     
-    try {
-      FieldCache.DEFAULT.getTerms(ar, "sortedset");
-      fail();
-    } catch (IllegalStateException expected) {}
-    
-    try {
-      FieldCache.DEFAULT.getTermsIndex(ar, "sortedset");
-      fail();
-    } catch (IllegalStateException expected) {}
-    
-    sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sortedset");
-    sortedSet.setDocument(0);
-    assertEquals(0, sortedSet.nextOrd());
-    assertEquals(1, sortedSet.nextOrd());
-    assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
-    assertEquals(2, sortedSet.getValueCount());
-    
-    bits = FieldCache.DEFAULT.getDocsWithField(ar, "sortedset");
+    bits = FieldCache.DEFAULT.getDocsWithField(ar, "numeric");
     assertTrue(bits instanceof Bits.MatchAllBits);
     
+    // SortedSet type: can be retrieved via getDocTermOrds() 
+    if (defaultCodecSupportsSortedSet()) {
+      try {
+        FieldCache.DEFAULT.getInts(ar, "sortedset", false);
+        fail();
+      } catch (IllegalStateException expected) {}
+    
+      try {
+        FieldCache.DEFAULT.getTerms(ar, "sortedset");
+        fail();
+      } catch (IllegalStateException expected) {}
+    
+      try {
+        FieldCache.DEFAULT.getTermsIndex(ar, "sortedset");
+        fail();
+      } catch (IllegalStateException expected) {}
+      
+      try {
+        new DocTermOrds(ar, "sortedset");
+        fail();
+      } catch (IllegalStateException expected) {}
+    
+      sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sortedset");
+      sortedSet.setDocument(0);
+      assertEquals(0, sortedSet.nextOrd());
+      assertEquals(1, sortedSet.nextOrd());
+      assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
+      assertEquals(2, sortedSet.getValueCount());
+    
+      bits = FieldCache.DEFAULT.getDocsWithField(ar, "sortedset");
+      assertTrue(bits instanceof Bits.MatchAllBits);
+    }
+    
     ir.close();
     dir.close();
   }

Modified: lucene/dev/branches/lucene4765/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java?rev=1446982&r1=1446981&r2=1446982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java (original)
+++ lucene/dev/branches/lucene4765/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java Sun Feb 17 00:24:30 2013
@@ -17,10 +17,16 @@ package org.apache.lucene.index;
  * limitations under the License.
  */
 
+import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
+
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.TreeSet;
 import java.util.Map.Entry;
 
 import org.apache.lucene.analysis.Analyzer;
@@ -34,12 +40,14 @@ import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FloatDocValuesField;
 import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.SortedSetDocValuesField;
 import org.apache.lucene.document.StoredField;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.FieldInfo.DocValuesType;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.FieldCache;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
@@ -1266,4 +1274,602 @@ public abstract class BaseDocValuesForma
       doTestSortedVsStoredFields(1, 10);
     }
   }
+  
+  public void testSortedSetOneValue() throws IOException {
+    assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+    Directory directory = newDirectory();
+    RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
+    
+    Document doc = new Document();
+    doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
+    iwriter.addDocument(doc);
+    
+    DirectoryReader ireader = iwriter.getReader();
+    iwriter.close();
+    
+    SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
+    
+    dv.setDocument(0);
+    assertEquals(0, dv.nextOrd());
+    assertEquals(NO_MORE_ORDS, dv.nextOrd());
+    
+    BytesRef bytes = new BytesRef();
+    dv.lookupOrd(0, bytes);
+    assertEquals(new BytesRef("hello"), bytes);
+
+    ireader.close();
+    directory.close();
+  }
+  
+  public void testSortedSetTwoFields() throws IOException {
+    assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+    Directory directory = newDirectory();
+    RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
+    
+    Document doc = new Document();
+    doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
+    doc.add(new SortedSetDocValuesField("field2", new BytesRef("world")));
+    iwriter.addDocument(doc);
+    
+    DirectoryReader ireader = iwriter.getReader();
+    iwriter.close();
+    
+    SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
+    
+    dv.setDocument(0);
+    assertEquals(0, dv.nextOrd());
+    assertEquals(NO_MORE_ORDS, dv.nextOrd());
+    
+    BytesRef bytes = new BytesRef();
+    dv.lookupOrd(0, bytes);
+    assertEquals(new BytesRef("hello"), bytes);
+    
+    dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field2");
+
+    dv.setDocument(0);
+    assertEquals(0, dv.nextOrd());
+    assertEquals(NO_MORE_ORDS, dv.nextOrd());
+    
+    dv.lookupOrd(0, bytes);
+    assertEquals(new BytesRef("world"), bytes);
+    
+    ireader.close();
+    directory.close();
+  }
+  
+  public void testSortedSetTwoDocumentsMerged() throws IOException {
+    assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+    Directory directory = newDirectory();
+    Analyzer analyzer = new MockAnalyzer(random());
+    IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
+    iwconfig.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
+  
+    Document doc = new Document();
+    doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
+    iwriter.addDocument(doc);
+    iwriter.commit();
+    
+    doc = new Document();
+    doc.add(new SortedSetDocValuesField("field", new BytesRef("world")));
+    iwriter.addDocument(doc);
+    iwriter.forceMerge(1);
+    
+    DirectoryReader ireader = iwriter.getReader();
+    iwriter.close();
+
+    SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
+    assertEquals(2, dv.getValueCount());
+    
+    dv.setDocument(0);
+    assertEquals(0, dv.nextOrd());
+    assertEquals(NO_MORE_ORDS, dv.nextOrd());
+    
+    BytesRef bytes = new BytesRef();
+    dv.lookupOrd(0, bytes);
+    assertEquals(new BytesRef("hello"), bytes);
+    
+    dv.setDocument(1);
+    assertEquals(1, dv.nextOrd());
+    assertEquals(NO_MORE_ORDS, dv.nextOrd());
+    
+    dv.lookupOrd(1, bytes);
+    assertEquals(new BytesRef("world"), bytes);   
+
+    ireader.close();
+    directory.close();
+  }
+  
+  public void testSortedSetTwoValues() throws IOException {
+    assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+    Directory directory = newDirectory();
+    RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
+    
+    Document doc = new Document();
+    doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
+    doc.add(new SortedSetDocValuesField("field", new BytesRef("world")));
+    iwriter.addDocument(doc);
+    
+    DirectoryReader ireader = iwriter.getReader();
+    iwriter.close();
+    
+    SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
+    
+    dv.setDocument(0);
+    assertEquals(0, dv.nextOrd());
+    assertEquals(1, dv.nextOrd());
+    assertEquals(NO_MORE_ORDS, dv.nextOrd());
+    
+    BytesRef bytes = new BytesRef();
+    dv.lookupOrd(0, bytes);
+    assertEquals(new BytesRef("hello"), bytes);
+    
+    dv.lookupOrd(1, bytes);
+    assertEquals(new BytesRef("world"), bytes);
+
+    ireader.close();
+    directory.close();
+  }
+  
+  public void testSortedSetTwoValuesUnordered() throws IOException {
+    assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+    Directory directory = newDirectory();
+    RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
+    
+    Document doc = new Document();
+    doc.add(new SortedSetDocValuesField("field", new BytesRef("world")));
+    doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
+    iwriter.addDocument(doc);
+    
+    DirectoryReader ireader = iwriter.getReader();
+    iwriter.close();
+    
+    SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
+    
+    dv.setDocument(0);
+    assertEquals(0, dv.nextOrd());
+    assertEquals(1, dv.nextOrd());
+    assertEquals(NO_MORE_ORDS, dv.nextOrd());
+    
+    BytesRef bytes = new BytesRef();
+    dv.lookupOrd(0, bytes);
+    assertEquals(new BytesRef("hello"), bytes);
+    
+    dv.lookupOrd(1, bytes);
+    assertEquals(new BytesRef("world"), bytes);
+
+    ireader.close();
+    directory.close();
+  }
+  
+  public void testSortedSetThreeValuesTwoDocs() throws IOException {
+    assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+    Directory directory = newDirectory();
+    Analyzer analyzer = new MockAnalyzer(random());
+    IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
+    iwconfig.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
+    
+    Document doc = new Document();
+    doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
+    doc.add(new SortedSetDocValuesField("field", new BytesRef("world")));
+    iwriter.addDocument(doc);
+    iwriter.commit();
+    
+    doc = new Document();
+    doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
+    doc.add(new SortedSetDocValuesField("field", new BytesRef("beer")));
+    iwriter.addDocument(doc);
+    iwriter.forceMerge(1);
+    
+    DirectoryReader ireader = iwriter.getReader();
+    iwriter.close();
+
+    SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
+    assertEquals(3, dv.getValueCount());
+    
+    dv.setDocument(0);
+    assertEquals(1, dv.nextOrd());
+    assertEquals(2, dv.nextOrd());
+    assertEquals(NO_MORE_ORDS, dv.nextOrd());
+    
+    dv.setDocument(1);
+    assertEquals(0, dv.nextOrd());
+    assertEquals(1, dv.nextOrd());
+    assertEquals(NO_MORE_ORDS, dv.nextOrd());
+    
+    BytesRef bytes = new BytesRef();
+    dv.lookupOrd(0, bytes);
+    assertEquals(new BytesRef("beer"), bytes);
+    
+    dv.lookupOrd(1, bytes);
+    assertEquals(new BytesRef("hello"), bytes);
+    
+    dv.lookupOrd(2, bytes);
+    assertEquals(new BytesRef("world"), bytes);
+
+    ireader.close();
+    directory.close();
+  }
+  
+  public void testSortedSetTwoDocumentsLastMissing() throws IOException {
+    assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+    Directory directory = newDirectory();
+    Analyzer analyzer = new MockAnalyzer(random());
+    IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
+    iwconfig.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
+    
+    Document doc = new Document();
+    doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
+    iwriter.addDocument(doc);
+    
+    doc = new Document();
+    iwriter.addDocument(doc);
+    iwriter.forceMerge(1);
+    DirectoryReader ireader = iwriter.getReader();
+    iwriter.close();
+    
+    SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
+    assertEquals(1, dv.getValueCount());
+    
+    dv.setDocument(0);
+    assertEquals(0, dv.nextOrd());
+    assertEquals(NO_MORE_ORDS, dv.nextOrd());
+    
+    BytesRef bytes = new BytesRef();
+    dv.lookupOrd(0, bytes);
+    assertEquals(new BytesRef("hello"), bytes);
+    
+    ireader.close();
+    directory.close();
+  }
+  
+  public void testSortedSetTwoDocumentsLastMissingMerge() throws IOException {
+    assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+    Directory directory = newDirectory();
+    Analyzer analyzer = new MockAnalyzer(random());
+    IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
+    iwconfig.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
+    
+    Document doc = new Document();
+    doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
+    iwriter.addDocument(doc);
+    iwriter.commit();
+    
+    doc = new Document();
+    iwriter.addDocument(doc);
+    iwriter.forceMerge(1);
+   
+    DirectoryReader ireader = iwriter.getReader();
+    iwriter.close();
+    
+    SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
+    assertEquals(1, dv.getValueCount());
+
+    dv.setDocument(0);
+    assertEquals(0, dv.nextOrd());
+    assertEquals(NO_MORE_ORDS, dv.nextOrd());
+    
+    BytesRef bytes = new BytesRef();
+    dv.lookupOrd(0, bytes);
+    assertEquals(new BytesRef("hello"), bytes);
+    
+    ireader.close();
+    directory.close();
+  }
+  
+  public void testSortedSetTwoDocumentsFirstMissing() throws IOException {
+    assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+    Directory directory = newDirectory();
+    Analyzer analyzer = new MockAnalyzer(random());
+    IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
+    iwconfig.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
+    
+    Document doc = new Document();
+    iwriter.addDocument(doc);
+    
+    doc = new Document();
+    doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
+    iwriter.addDocument(doc);
+    
+    iwriter.forceMerge(1);
+    DirectoryReader ireader = iwriter.getReader();
+    iwriter.close();
+    
+    SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
+    assertEquals(1, dv.getValueCount());
+
+    dv.setDocument(1);
+    assertEquals(0, dv.nextOrd());
+    assertEquals(NO_MORE_ORDS, dv.nextOrd());
+    
+    BytesRef bytes = new BytesRef();
+    dv.lookupOrd(0, bytes);
+    assertEquals(new BytesRef("hello"), bytes);
+    
+    ireader.close();
+    directory.close();
+  }
+  
+  public void testSortedSetTwoDocumentsFirstMissingMerge() throws IOException {
+    assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+    Directory directory = newDirectory();
+    Analyzer analyzer = new MockAnalyzer(random());
+    IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
+    iwconfig.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
+    
+    Document doc = new Document();
+    iwriter.addDocument(doc);
+    iwriter.commit();
+    
+    doc = new Document();
+    doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
+    iwriter.addDocument(doc);
+    iwriter.forceMerge(1);
+    
+    DirectoryReader ireader = iwriter.getReader();
+    iwriter.close();
+    
+    SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
+    assertEquals(1, dv.getValueCount());
+
+    dv.setDocument(1);
+    assertEquals(0, dv.nextOrd());
+    assertEquals(NO_MORE_ORDS, dv.nextOrd());
+    
+    BytesRef bytes = new BytesRef();
+    dv.lookupOrd(0, bytes);
+    assertEquals(new BytesRef("hello"), bytes);
+    
+    ireader.close();
+    directory.close();
+  }
+  
+  public void testSortedSetMergeAwayAllValues() throws IOException {
+    assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+    Directory directory = newDirectory();
+    Analyzer analyzer = new MockAnalyzer(random());
+    IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
+    iwconfig.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
+    
+    Document doc = new Document();
+    doc.add(new StringField("id", "0", Field.Store.NO));
+    iwriter.addDocument(doc);    
+    doc = new Document();
+    doc.add(new StringField("id", "1", Field.Store.NO));
+    doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
+    iwriter.addDocument(doc);
+    iwriter.commit();
+    iwriter.deleteDocuments(new Term("id", "1"));
+    iwriter.forceMerge(1);
+    
+    DirectoryReader ireader = iwriter.getReader();
+    iwriter.close();
+    
+    SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
+    assertEquals(0, dv.getValueCount());
+    
+    ireader.close();
+    directory.close();
+  }
+  
+  private void doTestSortedSetVsStoredFields(int minLength, int maxLength) throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
+    
+    // index some docs
+    int numDocs = atLeast(1000);
+    for (int i = 0; i < numDocs; i++) {
+      Document doc = new Document();
+      Field idField = new StringField("id", Integer.toString(i), Field.Store.NO);
+      doc.add(idField);
+      final int length;
+      if (minLength == maxLength) {
+        length = minLength; // fixed length
+      } else {
+        length = _TestUtil.nextInt(random(), minLength, maxLength);
+      }
+      int numValues = random().nextInt(17);
+      // create a random set of strings
+      Set<String> values = new TreeSet<String>();
+      for (int v = 0; v < numValues; v++) {
+        values.add(_TestUtil.randomSimpleString(random(), length));
+      }
+      
+      // add ordered to the stored field
+      for (String v : values) {
+        doc.add(new StoredField("stored", v));
+      }
+
+      // add in any order to the dv field
+      ArrayList<String> unordered = new ArrayList<String>(values);
+      Collections.shuffle(unordered, random());
+      for (String v : unordered) {
+        doc.add(new SortedSetDocValuesField("dv", new BytesRef(v)));
+      }
+
+      writer.addDocument(doc);
+      if (random().nextInt(31) == 0) {
+        writer.commit();
+      }
+    }
+    
+    // delete some docs
+    int numDeletions = random().nextInt(numDocs/10);
+    for (int i = 0; i < numDeletions; i++) {
+      int id = random().nextInt(numDocs);
+      writer.deleteDocuments(new Term("id", Integer.toString(id)));
+    }
+    writer.close();
+    
+    // compare
+    DirectoryReader ir = DirectoryReader.open(dir);
+    for (AtomicReaderContext context : ir.leaves()) {
+      AtomicReader r = context.reader();
+      SortedSetDocValues docValues = r.getSortedSetDocValues("dv");
+      BytesRef scratch = new BytesRef();
+      for (int i = 0; i < r.maxDoc(); i++) {
+        String stringValues[] = r.document(i).getValues("stored");
+        if (docValues != null) {
+          docValues.setDocument(i);
+        }
+        for (int j = 0; j < stringValues.length; j++) {
+          assert docValues != null;
+          long ord = docValues.nextOrd();
+          assert ord != NO_MORE_ORDS;
+          docValues.lookupOrd(ord, scratch);
+          assertEquals(stringValues[j], scratch.utf8ToString());
+        }
+        assert docValues == null || docValues.nextOrd() == NO_MORE_ORDS;
+      }
+    }
+    ir.close();
+    dir.close();
+  }
+  
+  public void testSortedSetFixedLengthVsStoredFields() throws Exception {
+    assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+    int numIterations = atLeast(1);
+    for (int i = 0; i < numIterations; i++) {
+      int fixedLength = _TestUtil.nextInt(random(), 1, 10);
+      doTestSortedSetVsStoredFields(fixedLength, fixedLength);
+    }
+  }
+  
+  public void testSortedSetVariableLengthVsStoredFields() throws Exception {
+    assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+    int numIterations = atLeast(1);
+    for (int i = 0; i < numIterations; i++) {
+      doTestSortedSetVsStoredFields(1, 10);
+    }
+  }
+  
+  private void assertEquals(int maxDoc, SortedSetDocValues expected, SortedSetDocValues actual) throws Exception {
+    // can be null for the segment if no docs actually had any SortedDocValues
+    // in this case FC.getDocTermsOrds returns EMPTY
+    if (actual == null) {
+      assertEquals(SortedSetDocValues.EMPTY, expected);
+      return;
+    }
+    assertEquals(expected.getValueCount(), actual.getValueCount());
+    // compare ord lists
+    for (int i = 0; i < maxDoc; i++) {
+      expected.setDocument(i);
+      actual.setDocument(i);
+      long expectedOrd;
+      while ((expectedOrd = expected.nextOrd()) != NO_MORE_ORDS) {
+        assertEquals(expectedOrd, actual.nextOrd());
+      }
+      assertEquals(NO_MORE_ORDS, actual.nextOrd());
+    }
+    
+    // compare ord dictionary
+    BytesRef expectedBytes = new BytesRef();
+    BytesRef actualBytes = new BytesRef();
+    for (long i = 0; i < expected.getValueCount(); i++) {
+      expected.lookupTerm(expectedBytes);
+      actual.lookupTerm(actualBytes);
+      assertEquals(expectedBytes, actualBytes);
+    }
+  }
+  
+  private void doTestSortedSetVsUninvertedField(int minLength, int maxLength) throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
+    
+    // index some docs
+    int numDocs = atLeast(1000);
+    for (int i = 0; i < numDocs; i++) {
+      Document doc = new Document();
+      Field idField = new StringField("id", Integer.toString(i), Field.Store.NO);
+      doc.add(idField);
+      final int length;
+      if (minLength == maxLength) {
+        length = minLength; // fixed length
+      } else {
+        length = _TestUtil.nextInt(random(), minLength, maxLength);
+      }
+      int numValues = random().nextInt(17);
+      // create a random list of strings
+      List<String> values = new ArrayList<String>();
+      for (int v = 0; v < numValues; v++) {
+        values.add(_TestUtil.randomSimpleString(random(), length));
+      }
+      
+      // add in any order to the indexed field
+      ArrayList<String> unordered = new ArrayList<String>(values);
+      Collections.shuffle(unordered, random());
+      for (String v : values) {
+        doc.add(newStringField("indexed", v, Field.Store.NO));
+      }
+
+      // add in any order to the dv field
+      ArrayList<String> unordered2 = new ArrayList<String>(values);
+      Collections.shuffle(unordered2, random());
+      for (String v : unordered2) {
+        doc.add(new SortedSetDocValuesField("dv", new BytesRef(v)));
+      }
+
+      writer.addDocument(doc);
+      if (random().nextInt(31) == 0) {
+        writer.commit();
+      }
+    }
+    
+    // compare per-segment
+    // NOTE: we must do this before deleting, because FC.getDocTermsOrds/UninvertedField
+    // "bakes in" the deletes at the time it was first called.
+    DirectoryReader ir = writer.getReader();
+    for (AtomicReaderContext context : ir.leaves()) {
+      AtomicReader r = context.reader();
+      SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(r, "indexed");
+      SortedSetDocValues actual = r.getSortedSetDocValues("dv");
+      assertEquals(r.maxDoc(), expected, actual);
+    }
+    ir.close();
+    
+    // delete some docs
+    int numDeletions = random().nextInt(numDocs/10);
+    for (int i = 0; i < numDeletions; i++) {
+      int id = random().nextInt(numDocs);
+      writer.deleteDocuments(new Term("id", Integer.toString(id)));
+    }
+    
+    writer.forceMerge(1);
+    
+    // now compare again after the merge
+    ir = writer.getReader();
+    AtomicReader ar = getOnlySegmentReader(ir);
+    SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(ar, "indexed");
+    SortedSetDocValues actual = ar.getSortedSetDocValues("dv");
+    assertEquals(ir.maxDoc(), expected, actual);
+    ir.close();
+    
+    writer.close();
+    dir.close();
+  }
+  
+  public void testSortedSetFixedLengthVsUninvertedField() throws Exception {
+    assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+    int numIterations = atLeast(1);
+    for (int i = 0; i < numIterations; i++) {
+      int fixedLength = _TestUtil.nextInt(random(), 1, 10);
+      doTestSortedSetVsUninvertedField(fixedLength, fixedLength);
+    }
+  }
+  
+  public void testSortedSetVariableLengthVsUninvertedField() throws Exception {
+    assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+    int numIterations = atLeast(1);
+    for (int i = 0; i < numIterations; i++) {
+      doTestSortedSetVsUninvertedField(1, 10);
+    }
+  }
 }

Modified: lucene/dev/branches/lucene4765/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java?rev=1446982&r1=1446981&r2=1446982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java (original)
+++ lucene/dev/branches/lucene4765/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java Sun Feb 17 00:24:30 2013
@@ -26,6 +26,7 @@ import java.util.concurrent.*;
 import java.util.logging.Logger;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
@@ -1296,4 +1297,13 @@ public abstract class LuceneTestCase ext
       throw new IOException("Cannot find resource: " + name);
     }
   }
+  
+  /** Returns true if the default codec supports SORTED_SET docvalues */ 
+  public static boolean defaultCodecSupportsSortedSet() {
+    String name = Codec.getDefault().getName();
+    if (name.equals("Lucene40") || name.equals("Lucene41")) {
+      return false;
+    }
+    return true;
+  }
 }