You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/02/11 16:43:25 UTC
svn commit: r1444835 - in /lucene/dev/branches/lucene4765/lucene:
core/src/java/org/apache/lucene/index/
core/src/java/org/apache/lucene/search/
core/src/test/org/apache/lucene/search/
grouping/src/java/org/apache/lucene/search/grouping/term/ join/src/...
Author: rmuir
Date: Mon Feb 11 15:43:24 2013
New Revision: 1444835
URL: http://svn.apache.org/r1444835
Log:
add hack to begin incremental cutover
Modified:
lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/search/FieldCache.java
lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java
lucene/dev/branches/lucene4765/lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java
lucene/dev/branches/lucene4765/lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java
lucene/dev/branches/lucene4765/lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java
lucene/dev/branches/lucene4765/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java
Modified: lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java?rev=1444835&r1=1444834&r2=1444835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java (original)
+++ lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java Mon Feb 11 15:43:24 2013
@@ -847,4 +847,67 @@ public class DocTermOrds {
termsEnum.seekExact(ord);
return termsEnum.term();
}
+
+ /** Returns a SortedSetDocValues view of this instance */
+ public SortedSetDocValues iterator(TermsEnum termsEnum) throws IOException {
+ return new Iterator(termsEnum);
+ }
+
+ // nocommit: make private (just public to enable hack to cutover gradually)
+ public class Iterator extends SortedSetDocValues {
+ final TermsEnum te;
+ final TermOrdsIterator in = new TermOrdsIterator(); // nocommit: don't wrap this other iterator
+ final int buffer[] = new int[5];
+ int bufferUpto;
+ int bufferLength;
+
+ Iterator(TermsEnum te) {
+ this.te = te;
+ }
+
+ @Override
+ public long nextOrd() {
+ while (bufferUpto == bufferLength) {
+ if (bufferLength < buffer.length) {
+ return NO_MORE_ORDS;
+ } else {
+ bufferLength = in.read(buffer);
+ bufferUpto = 0;
+ }
+ }
+ int next = buffer[bufferUpto];
+ bufferUpto++;
+ return next;
+ }
+
+ @Override
+ public void setDocument(int docID) {
+ in.reset(docID);
+ bufferUpto = 0;
+ bufferLength = in.read(buffer);
+ }
+
+ @Override
+ public void lookupOrd(long ord, BytesRef result) {
+ BytesRef ref = null;
+ try {
+ ref = DocTermOrds.this.lookupTerm(te, (int) ord);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ result.bytes = ref.bytes;
+ result.offset = ref.offset;
+ result.length = ref.length;
+ }
+
+ @Override
+ public long getValueCount() {
+ return numTerms();
+ }
+
+ // nocommit: just a hack for gradual cutover
+ public DocTermOrds getParent() {
+ return DocTermOrds.this;
+ }
+ }
}
Modified: lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/search/FieldCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/search/FieldCache.java?rev=1444835&r1=1444834&r2=1444835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/search/FieldCache.java (original)
+++ lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/search/FieldCache.java Mon Feb 11 15:43:24 2013
@@ -29,6 +29,7 @@ import org.apache.lucene.index.AtomicRea
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocTermOrds;
import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.Bits;
@@ -602,7 +603,7 @@ public interface FieldCache {
* @return a {@link DocTermOrds} instance
* @throws IOException If any error occurs.
*/
- public DocTermOrds getDocTermOrds(AtomicReader reader, String field) throws IOException;
+ public SortedSetDocValues getDocTermOrds(AtomicReader reader, String field) throws IOException;
/**
* EXPERT: A unique Identifier/Description for each item in the FieldCache.
Modified: lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java?rev=1444835&r1=1444834&r2=1444835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java (original)
+++ lucene/dev/branches/lucene4765/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java Mon Feb 11 15:43:24 2013
@@ -34,6 +34,7 @@ import org.apache.lucene.index.IndexRead
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.ArrayUtil;
@@ -1303,8 +1304,17 @@ class FieldCacheImpl implements FieldCac
}
}
- public DocTermOrds getDocTermOrds(AtomicReader reader, String field) throws IOException {
- return (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new CacheKey(field, null), false);
+ public SortedSetDocValues getDocTermOrds(AtomicReader reader, String field) throws IOException {
+ SortedSetDocValues dv = reader.getSortedSetDocValues(field);
+ if (dv != null) {
+ return dv;
+ }
+
+ // nocommit: actually if they have a SortedDV (either indexed as DV or cached), we should return an impl
+ // over that: its like a specialized single-value case of this thing...
+
+ DocTermOrds dto = (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new CacheKey(field, null), false);
+ return dto.iterator(dto.getOrdTermsEnum(reader));
}
static final class DocTermOrdsCache extends Cache {
Modified: lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java?rev=1444835&r1=1444834&r2=1444835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java (original)
+++ lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java Mon Feb 11 15:43:24 2013
@@ -253,44 +253,29 @@ public class TestFieldCache extends Luce
terms = cache.getTerms(reader, "bogusfield");
// getDocTermOrds
- DocTermOrds termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField");
- TermsEnum termsEnum = termOrds.getOrdTermsEnum(reader);
- assertSame("Second request to cache return same DocTermOrds", termOrds, cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField"));
- DocTermOrds.TermOrdsIterator reuse = null;
+ SortedSetDocValues termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField");
+ // nocommit: test this with reflection or something, that its really from the same DTO
+ // assertSame("Second request to cache return same DocTermOrds", termOrds, cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField"));
for (int i = 0; i < NUM_DOCS; i++) {
- reuse = termOrds.lookup(i, reuse);
- final int[] buffer = new int[5];
+ termOrds.setDocument(i);
// This will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
List<BytesRef> values = new ArrayList<BytesRef>(new LinkedHashSet<BytesRef>(Arrays.asList(multiValued[i])));
- for (;;) {
- int chunk = reuse.read(buffer);
- if (chunk == 0) {
- for (int ord = 0; ord < values.size(); ord++) {
- BytesRef term = values.get(ord);
- assertNull(String.format(Locale.ROOT, "Document[%d] misses field must be null. Has value %s for ord %d", i, term, ord), term);
- }
- break;
- }
-
- for(int idx=0; idx < chunk; idx++) {
- int key = buffer[idx];
- termsEnum.seekExact((long) key);
- String actual = termsEnum.term().utf8ToString();
- String expected = values.get(idx).utf8ToString();
- if (!expected.equals(actual)) {
- reuse = termOrds.lookup(i, reuse);
- reuse.read(buffer);
- }
- assertTrue(String.format(Locale.ROOT, "Expected value %s for doc %d and ord %d, but was %s", expected, i, idx, actual), expected.equals(actual));
- }
-
- if (chunk <= buffer.length) {
+ for (BytesRef v : values) {
+ if (v == null) {
+ // why does this test use null values... instead of an empty list: confusing
break;
}
+ long ord = termOrds.nextOrd();
+ assert ord != SortedSetDocValues.NO_MORE_ORDS;
+ BytesRef scratch = new BytesRef();
+ termOrds.lookupOrd(ord, scratch);
+ assertEquals(v, scratch);
}
+ assertEquals(SortedSetDocValues.NO_MORE_ORDS, termOrds.nextOrd());
}
// test bad field
+ // nocommit: what exactly does this test?
termOrds = cache.getDocTermOrds(reader, "bogusfield");
FieldCache.DEFAULT.purge(reader);
Modified: lucene/dev/branches/lucene4765/lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java?rev=1444835&r1=1444834&r2=1444835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java (original)
+++ lucene/dev/branches/lucene4765/lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java Mon Feb 11 15:43:24 2013
@@ -283,7 +283,9 @@ public abstract class TermGroupFacetColl
reuse = null;
groupFieldTermsIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), groupField);
- facetFieldDocTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), facetField);
+ // nocommit: cut over
+ DocTermOrds.Iterator iterator = (DocTermOrds.Iterator) FieldCache.DEFAULT.getDocTermOrds(context.reader(), facetField);
+ facetFieldDocTermOrds = iterator.getParent();
facetOrdTermsEnum = facetFieldDocTermOrds.getOrdTermsEnum(context.reader());
// [facetFieldDocTermOrds.numTerms() + 1] for all possible facet values and docs not containing facet field
segmentFacetCounts = new int[facetFieldDocTermOrds.numTerms() + 1];
Modified: lucene/dev/branches/lucene4765/lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java?rev=1444835&r1=1444834&r2=1444835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java (original)
+++ lucene/dev/branches/lucene4765/lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java Mon Feb 11 15:43:24 2013
@@ -100,7 +100,9 @@ abstract class TermsCollector extends Co
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
- docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), field);
+ // nocommit: cut over
+ DocTermOrds.Iterator iterator = (DocTermOrds.Iterator) FieldCache.DEFAULT.getDocTermOrds(context.reader(), field);
+ docTermOrds = iterator.getParent();
docTermsEnum = docTermOrds.getOrdTermsEnum(context.reader());
reuse = null; // LUCENE-3377 needs to be fixed first then this statement can be removed...
}
Modified: lucene/dev/branches/lucene4765/lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java?rev=1444835&r1=1444834&r2=1444835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java (original)
+++ lucene/dev/branches/lucene4765/lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java Mon Feb 11 15:43:24 2013
@@ -234,7 +234,9 @@ abstract class TermsWithScoreCollector e
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
- fromDocTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), field);
+ // nocommit: cut over
+ DocTermOrds.Iterator iterator = (DocTermOrds.Iterator) FieldCache.DEFAULT.getDocTermOrds(context.reader(), field);
+ fromDocTermOrds = iterator.getParent();
docTermsEnum = fromDocTermOrds.getOrdTermsEnum(context.reader());
reuse = null; // LUCENE-3377 needs to be fixed first then this statement can be removed...
}
Modified: lucene/dev/branches/lucene4765/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java?rev=1444835&r1=1444834&r2=1444835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java (original)
+++ lucene/dev/branches/lucene4765/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java Mon Feb 11 15:43:24 2013
@@ -505,7 +505,9 @@ public class TestJoinUtil extends Lucene
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
- docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), fromField);
+ // nocommit: cut over
+ DocTermOrds.Iterator iterator = (DocTermOrds.Iterator) FieldCache.DEFAULT.getDocTermOrds(context.reader(), fromField);
+ docTermOrds = iterator.getParent();
docTermsEnum = docTermOrds.getOrdTermsEnum(context.reader());
reuse = null;
}
@@ -629,7 +631,8 @@ public class TestJoinUtil extends Lucene
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
docBase = context.docBase;
- docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), toField);
+ DocTermOrds.Iterator iterator = (DocTermOrds.Iterator) FieldCache.DEFAULT.getDocTermOrds(context.reader(), toField);
+ docTermOrds = iterator.getParent();
docTermsEnum = docTermOrds.getOrdTermsEnum(context.reader());
reuse = null;
}