You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2010/02/17 11:46:47 UTC
svn commit: r910906 - in /lucene/java/branches/flex_1458/src:
java/org/apache/lucene/index/codecs/standard/ test/org/apache/lucene/index/
Author: mikemccand
Date: Wed Feb 17 10:46:47 2010
New Revision: 910906
URL: http://svn.apache.org/viewvc?rev=910906&view=rev
Log:
flex: fixed int overflow cases in terms dict; differentiate loaded vs on-disk index terms in StandardTermsIndexReader
Modified:
lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java
lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsIndexReader.java
lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestIndexWriter.java
Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java?rev=910906&r1=910905&r2=910906&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java Wed Feb 17 10:46:47 2010
@@ -222,8 +222,12 @@
}
@Override
- public boolean isIndexTerm(long ord, int docFreq) {
- return ord % totalIndexInterval == 0;
+ public boolean isIndexTerm(long ord, int docFreq, boolean onlyLoaded) {
+ if (onlyLoaded) {
+ return ord % totalIndexInterval == 0;
+ } else {
+ return ord % indexInterval == 0;
+ }
}
@Override
@@ -433,14 +437,12 @@
int hi = fileOffset.length - 1;
while (hi >= lo) {
- int mid = (lo + hi) >> 1;
+ int mid = (lo + hi) >>> 1;
final long loc = blockPointer[mid];
result.term.bytes = blocks[(int) (loc >> BYTE_BLOCK_SHIFT)];
result.term.offset = (int) (loc & BYTE_BLOCK_MASK);
- //System.out.println(" cycle mid=" + mid + " bytes=" + result.term.bytes + " offset=" + result.term.offset);
result.term.length = termLength[mid];
- //System.out.println(" term=" + result.term);
int delta = termComp.compare(term, result.term);
if (delta < 0) {
Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java?rev=910906&r1=910905&r2=910906&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java Wed Feb 17 10:46:47 2010
@@ -67,7 +67,7 @@
// Comparator that orders our terms
private final BytesRef.Comparator termComp;
- // Caches the most recently looked-up Terms:
+ // Caches the most recently looked-up field + terms:
private final Cache<FieldAndTerm,TermState> termsCache;
// Reads the terms index
@@ -359,8 +359,8 @@
}
}
- // Useed only for assert:
- final int startOrd;
+ // Used only for assert:
+ final long startOrd;
if (doSeek) {
@@ -383,10 +383,10 @@
// special case it:
bytesReader.reset(indexResult.term);
- state.ord = (int) indexResult.position-1;
+ state.ord = indexResult.position-1;
assert state.ord >= -1: "ord=" + state.ord;
- startOrd = (int) indexResult.position;
+ startOrd = indexResult.position;
if (Codec.DEBUG) {
Codec.debug(" set ord=" + state.ord);
@@ -434,7 +434,7 @@
// term we are looking for. So, we should never
// cross another index term (besides the first
// one) while we are scanning:
- assert state.ord == startOrd || !indexReader.isIndexTerm(state.ord, state.docFreq);
+ assert state.ord == startOrd || !indexReader.isIndexTerm(state.ord, state.docFreq, true);
}
if (Codec.DEBUG) {
@@ -529,7 +529,7 @@
// wrong offset. make a test...
postingsReader.readTerm(in,
fieldInfo, state,
- indexReader.isIndexTerm(1+state.ord, state.docFreq));
+ indexReader.isIndexTerm(1+state.ord, state.docFreq, false));
state.ord++;
Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsIndexReader.java?rev=910906&r1=910905&r2=910906&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsIndexReader.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsIndexReader.java Wed Feb 17 10:46:47 2010
@@ -58,7 +58,7 @@
/** Call this sequentially for each term encoutered,
* after calling {@link #getIndexOffset}. */
- public abstract boolean isIndexTerm(long ord, int docFreq) throws IOException;
+ public abstract boolean isIndexTerm(long ord, int docFreq, boolean onlyLoaded) throws IOException;
/** Finds the next index term, after the specified
* ord. Returns true if one exists. */
Modified: lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=910906&r1=910905&r2=910906&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestIndexWriter.java Wed Feb 17 10:46:47 2010
@@ -4817,4 +4817,31 @@
writer.close();
dir.close();
}
+
+ public void testIndexDivisor() throws Exception {
+ Directory dir = new MockRAMDirectory();
+ IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
+ StringBuilder s = new StringBuilder();
+ // must be > 256
+ for(int i=0;i<300;i++) {
+ s.append(' ').append(""+i);
+ }
+ Document d = new Document();
+ Field f = new Field("field", s.toString(), Field.Store.NO, Field.Index.ANALYZED);
+ d.add(f);
+ w.addDocument(d);
+ IndexReader r = w.getReader(2).getSequentialSubReaders()[0];
+ TermsEnum t = r.fields().terms("field").iterator();
+ int count = 0;
+ while(t.next() != null) {
+ final DocsEnum docs = t.docs(null, null);
+ assertEquals(0, docs.nextDoc());
+ assertEquals(DocsEnum.NO_MORE_DOCS, docs.nextDoc());
+ count++;
+ }
+ assertEquals(300, count);
+ r.close();
+ w.close();
+ dir.close();
+ }
}