You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/02/11 16:43:25 UTC

svn commit: r1444835 - in /lucene/dev/branches/lucene4765/lucene: core/src/java/org/apache/lucene/index/ core/src/java/org/apache/lucene/search/ core/src/test/org/apache/lucene/search/ grouping/src/java/org/apache/lucene/search/grouping/term/ join/src/...

Author: rmuir
Date: Mon Feb 11 15:43:24 2013
New Revision: 1444835

URL: http://svn.apache.org/r1444835
Log:
add hack to begin incremental cutover

Modified:
    lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
    lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/search/FieldCache.java
    lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
    lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java
    lucene/dev/branches/lucene4765/lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java
    lucene/dev/branches/lucene4765/lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java
    lucene/dev/branches/lucene4765/lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java
    lucene/dev/branches/lucene4765/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java

Modified: lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java?rev=1444835&r1=1444834&r2=1444835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java (original)
+++ lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java Mon Feb 11 15:43:24 2013
@@ -847,4 +847,67 @@ public class DocTermOrds {
     termsEnum.seekExact(ord);
     return termsEnum.term();
   }
+  
+  /** Returns a SortedSetDocValues view of this instance */
+  public SortedSetDocValues iterator(TermsEnum termsEnum) throws IOException {
+    return new Iterator(termsEnum);
+  }
+  
+  // nocommit: make private (just public to enable hack to cutover gradually)
+  public class Iterator extends SortedSetDocValues {
+    final TermsEnum te;
+    final TermOrdsIterator in = new TermOrdsIterator(); // nocommit: don't wrap this other iterator
+    final int buffer[] = new int[5];
+    int bufferUpto;
+    int bufferLength;
+    
+    Iterator(TermsEnum te) {
+      this.te = te;
+    }
+    
+    @Override
+    public long nextOrd() {
+      while (bufferUpto == bufferLength) {
+        if (bufferLength < buffer.length) {
+          return NO_MORE_ORDS;
+        } else {
+          bufferLength = in.read(buffer);
+          bufferUpto = 0;
+        }
+      }
+      int next = buffer[bufferUpto];
+      bufferUpto++;
+      return next;
+    }
+
+    @Override
+    public void setDocument(int docID) {
+      in.reset(docID);
+      bufferUpto = 0;
+      bufferLength = in.read(buffer);
+    }
+
+    @Override
+    public void lookupOrd(long ord, BytesRef result) {
+      BytesRef ref = null;
+      try {
+        ref = DocTermOrds.this.lookupTerm(te, (int) ord);
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+      result.bytes = ref.bytes;
+      result.offset = ref.offset;
+      result.length = ref.length;
+    }
+
+    @Override
+    public long getValueCount() {
+      return numTerms();
+    }
+    
+    // nocommit: just a hack for gradual cutover
+    public DocTermOrds getParent() {
+      return DocTermOrds.this;
+    }
+  }
 }

Modified: lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/search/FieldCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/search/FieldCache.java?rev=1444835&r1=1444834&r2=1444835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/search/FieldCache.java (original)
+++ lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/search/FieldCache.java Mon Feb 11 15:43:24 2013
@@ -29,6 +29,7 @@ import org.apache.lucene.index.AtomicRea
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.DocTermOrds;
 import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.util.Bits;
@@ -602,7 +603,7 @@ public interface FieldCache {
    * @return a {@link DocTermOrds} instance
    * @throws IOException  If any error occurs.
    */
-  public DocTermOrds getDocTermOrds(AtomicReader reader, String field) throws IOException;
+  public SortedSetDocValues getDocTermOrds(AtomicReader reader, String field) throws IOException;
 
   /**
    * EXPERT: A unique Identifier/Description for each item in the FieldCache. 

Modified: lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java?rev=1444835&r1=1444834&r2=1444835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java (original)
+++ lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java Mon Feb 11 15:43:24 2013
@@ -34,6 +34,7 @@ import org.apache.lucene.index.IndexRead
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.SegmentReader;
 import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.util.ArrayUtil;
@@ -1303,8 +1304,17 @@ class FieldCacheImpl implements FieldCac
     }
   }
 
-  public DocTermOrds getDocTermOrds(AtomicReader reader, String field) throws IOException {
-    return (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new CacheKey(field, null), false);
+  public SortedSetDocValues getDocTermOrds(AtomicReader reader, String field) throws IOException {
+    SortedSetDocValues dv = reader.getSortedSetDocValues(field);
+    if (dv != null) {
+      return dv;
+    }
+    
+    // nocommit: actually if they have a SortedDV (either indexed as DV or cached), we should return an impl
+    // over that: its like a specialized single-value case of this thing...
+    
+    DocTermOrds dto = (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new CacheKey(field, null), false);
+    return dto.iterator(dto.getOrdTermsEnum(reader));
   }
 
   static final class DocTermOrdsCache extends Cache {

Modified: lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java?rev=1444835&r1=1444834&r2=1444835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java (original)
+++ lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java Mon Feb 11 15:43:24 2013
@@ -253,44 +253,29 @@ public class TestFieldCache extends Luce
     terms = cache.getTerms(reader, "bogusfield");
 
     // getDocTermOrds
-    DocTermOrds termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField");
-    TermsEnum termsEnum = termOrds.getOrdTermsEnum(reader);
-    assertSame("Second request to cache return same DocTermOrds", termOrds, cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField"));
-    DocTermOrds.TermOrdsIterator reuse = null;
+    SortedSetDocValues termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField");
+    // nocommit: test this with reflection or something, that its really from the same DTO
+    // assertSame("Second request to cache return same DocTermOrds", termOrds, cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField"));
     for (int i = 0; i < NUM_DOCS; i++) {
-      reuse = termOrds.lookup(i, reuse);
-      final int[] buffer = new int[5];
+      termOrds.setDocument(i);
       // This will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
       List<BytesRef> values = new ArrayList<BytesRef>(new LinkedHashSet<BytesRef>(Arrays.asList(multiValued[i])));
-      for (;;) {
-        int chunk = reuse.read(buffer);
-        if (chunk == 0) {
-          for (int ord = 0; ord < values.size(); ord++) {
-            BytesRef term = values.get(ord);
-            assertNull(String.format(Locale.ROOT, "Document[%d] misses field must be null. Has value %s for ord %d", i, term, ord), term);
-          }
-          break;
-        }
-
-        for(int idx=0; idx < chunk; idx++) {
-          int key = buffer[idx];
-          termsEnum.seekExact((long) key);
-          String actual = termsEnum.term().utf8ToString();
-          String expected = values.get(idx).utf8ToString();
-          if (!expected.equals(actual)) {
-              reuse = termOrds.lookup(i, reuse);
-              reuse.read(buffer);
-          }
-          assertTrue(String.format(Locale.ROOT, "Expected value %s for doc %d and ord %d, but was %s", expected, i, idx, actual), expected.equals(actual));
-        }
-
-        if (chunk <= buffer.length) {
+      for (BytesRef v : values) {
+        if (v == null) {
+          // why does this test use null values... instead of an empty list: confusing
           break;
         }
+        long ord = termOrds.nextOrd();
+        assert ord != SortedSetDocValues.NO_MORE_ORDS;
+        BytesRef scratch = new BytesRef();
+        termOrds.lookupOrd(ord, scratch);
+        assertEquals(v, scratch);
       }
+      assertEquals(SortedSetDocValues.NO_MORE_ORDS, termOrds.nextOrd());
     }
 
     // test bad field
+    // nocommit: what exactly does this test?
     termOrds = cache.getDocTermOrds(reader, "bogusfield");
 
     FieldCache.DEFAULT.purge(reader);

Modified: lucene/dev/branches/lucene4765/lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java?rev=1444835&r1=1444834&r2=1444835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java (original)
+++ lucene/dev/branches/lucene4765/lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java Mon Feb 11 15:43:24 2013
@@ -283,7 +283,9 @@ public abstract class TermGroupFacetColl
 
       reuse = null;
       groupFieldTermsIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), groupField);
-      facetFieldDocTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), facetField);
+      // nocommit: cut over
+      DocTermOrds.Iterator iterator = (DocTermOrds.Iterator) FieldCache.DEFAULT.getDocTermOrds(context.reader(), facetField);
+      facetFieldDocTermOrds = iterator.getParent();
       facetOrdTermsEnum = facetFieldDocTermOrds.getOrdTermsEnum(context.reader());
       // [facetFieldDocTermOrds.numTerms() + 1] for all possible facet values and docs not containing facet field
       segmentFacetCounts = new int[facetFieldDocTermOrds.numTerms() + 1];

Modified: lucene/dev/branches/lucene4765/lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java?rev=1444835&r1=1444834&r2=1444835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java (original)
+++ lucene/dev/branches/lucene4765/lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java Mon Feb 11 15:43:24 2013
@@ -100,7 +100,9 @@ abstract class TermsCollector extends Co
 
     @Override
     public void setNextReader(AtomicReaderContext context) throws IOException {
-      docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), field);
+      // nocommit: cut over
+      DocTermOrds.Iterator iterator = (DocTermOrds.Iterator) FieldCache.DEFAULT.getDocTermOrds(context.reader(), field);
+      docTermOrds = iterator.getParent();
       docTermsEnum = docTermOrds.getOrdTermsEnum(context.reader());
       reuse = null; // LUCENE-3377 needs to be fixed first then this statement can be removed...
     }

Modified: lucene/dev/branches/lucene4765/lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java?rev=1444835&r1=1444834&r2=1444835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java (original)
+++ lucene/dev/branches/lucene4765/lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java Mon Feb 11 15:43:24 2013
@@ -234,7 +234,9 @@ abstract class TermsWithScoreCollector e
 
     @Override
     public void setNextReader(AtomicReaderContext context) throws IOException {
-      fromDocTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), field);
+      // nocommit: cut over
+      DocTermOrds.Iterator iterator = (DocTermOrds.Iterator) FieldCache.DEFAULT.getDocTermOrds(context.reader(), field);
+      fromDocTermOrds = iterator.getParent();
       docTermsEnum = fromDocTermOrds.getOrdTermsEnum(context.reader());
       reuse = null; // LUCENE-3377 needs to be fixed first then this statement can be removed...
     }

Modified: lucene/dev/branches/lucene4765/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java?rev=1444835&r1=1444834&r2=1444835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java (original)
+++ lucene/dev/branches/lucene4765/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java Mon Feb 11 15:43:24 2013
@@ -505,7 +505,9 @@ public class TestJoinUtil extends Lucene
 
           @Override
           public void setNextReader(AtomicReaderContext context) throws IOException {
-            docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), fromField);
+            // nocommit: cut over
+            DocTermOrds.Iterator iterator = (DocTermOrds.Iterator) FieldCache.DEFAULT.getDocTermOrds(context.reader(), fromField);
+            docTermOrds = iterator.getParent();
             docTermsEnum = docTermOrds.getOrdTermsEnum(context.reader());
             reuse = null;
           }
@@ -629,7 +631,8 @@ public class TestJoinUtil extends Lucene
             @Override
             public void setNextReader(AtomicReaderContext context) throws IOException {
               docBase = context.docBase;
-              docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), toField);
+              DocTermOrds.Iterator iterator = (DocTermOrds.Iterator) FieldCache.DEFAULT.getDocTermOrds(context.reader(), toField);
+              docTermOrds = iterator.getParent();
               docTermsEnum = docTermOrds.getOrdTermsEnum(context.reader());
               reuse = null;
             }