You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by cu...@apache.org on 2004/02/19 19:28:59 UTC
cvs commit: jakarta-lucene/src/java/org/apache/lucene/index MultiReader.java FilterIndexReader.java IndexReader.java SegmentReader.java SegmentsReader.java
cutting 2004/02/19 10:28:59
Modified: . CHANGES.txt
src/java/org/apache/lucene/index FilterIndexReader.java
IndexReader.java SegmentReader.java
Added: src/java/org/apache/lucene/index MultiReader.java
Removed: src/java/org/apache/lucene/index SegmentsReader.java
Log:
Added MultiReader, an IndexReader that combines multiple other IndexReaders.
Revision Changes Path
1.73 +3 -1 jakarta-lucene/CHANGES.txt
Index: CHANGES.txt
===================================================================
RCS file: /home/cvs/jakarta-lucene/CHANGES.txt,v
retrieving revision 1.72
retrieving revision 1.73
diff -u -r1.72 -r1.73
--- CHANGES.txt 17 Feb 2004 19:00:31 -0000 1.72
+++ CHANGES.txt 19 Feb 2004 18:28:59 -0000 1.73
@@ -51,6 +51,8 @@
one that delegates through that of the Searcher. (Julien Nioche
via Cutting)
+ 9. Added MultiReader, an IndexReader that combines multiple other
+ IndexReaders. (Cutting)
1.3 final
1.6 +3 -0 jakarta-lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
Index: FilterIndexReader.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/FilterIndexReader.java,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -r1.5 -r1.6
--- FilterIndexReader.java 15 Dec 2003 23:04:42 -0000 1.5
+++ FilterIndexReader.java 19 Feb 2004 18:28:59 -0000 1.6
@@ -128,6 +128,9 @@
public void undeleteAll() throws IOException { in.undeleteAll(); }
public byte[] norms(String f) throws IOException { return in.norms(f); }
+ public void norms(String f, byte[] bytes, int offset) throws IOException {
+ in.norms(f, bytes, offset);
+ }
public void setNorm(int d, String f, byte b) throws IOException {
in.setNorm(d,f,b);
}
1.25 +11 -3 jakarta-lucene/src/java/org/apache/lucene/index/IndexReader.java
Index: IndexReader.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/IndexReader.java,v
retrieving revision 1.24
retrieving revision 1.25
diff -u -r1.24 -r1.25
--- IndexReader.java 15 Dec 2003 23:04:42 -0000 1.24
+++ IndexReader.java 19 Feb 2004 18:28:59 -0000 1.25
@@ -117,10 +117,10 @@
if (infos.size() == 1) { // index is optimized
return new SegmentReader(infos, infos.info(0), true);
} else {
- SegmentReader[] readers = new SegmentReader[infos.size()];
+ IndexReader[] readers = new IndexReader[infos.size()];
for (int i = 0; i < infos.size(); i++)
readers[i] = new SegmentReader(infos, infos.info(i), i==infos.size()-1);
- return new SegmentsReader(infos, directory, readers);
+ return new MultiReader(directory, readers);
}
}
}.run();
@@ -271,6 +271,14 @@
* @see Field#setBoost(float)
*/
public abstract byte[] norms(String field) throws IOException;
+
+ /** Reads the byte-encoded normalization factor for the named field of every
+ * document. This is used by the search code to score documents.
+ *
+ * @see Field#setBoost(float)
+ */
+ public abstract void norms(String field, byte[] bytes, int offset)
+ throws IOException;
/** Expert: Resets the normalization factor for the named field of the named
* document. The norm represents the product of the field's {@link
1.18 +2 -2 jakarta-lucene/src/java/org/apache/lucene/index/SegmentReader.java
Index: SegmentReader.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentReader.java,v
retrieving revision 1.17
retrieving revision 1.18
diff -u -r1.17 -r1.18
--- SegmentReader.java 15 Dec 2003 23:04:42 -0000 1.17
+++ SegmentReader.java 19 Feb 2004 18:28:59 -0000 1.18
@@ -357,7 +357,7 @@
}
/** Read norms into a pre-allocated array. */
- synchronized void norms(String field, byte[] bytes, int offset)
+ public synchronized void norms(String field, byte[] bytes, int offset)
throws IOException {
Norm norm = (Norm)norms.get(field);
1.1 jakarta-lucene/src/java/org/apache/lucene/index/MultiReader.java
Index: MultiReader.java
===================================================================
package org.apache.lucene.index;
/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2001, 2002, 2003 The Apache Software Foundation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache Lucene" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* "Apache Lucene", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
import java.io.IOException;
import java.util.Collection;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.store.Directory;
/** An IndexReader which reads multiple indexes, appending their content.
*
* @version $Id: MultiReader.java,v 1.1 2004/02/19 18:28:59 cutting Exp $
*/
public class MultiReader extends IndexReader {
private IndexReader[] readers;
private int[] starts; // 1st docno for each segment
private Hashtable normsCache = new Hashtable();
private int maxDoc = 0;
private int numDocs = -1;
private boolean hasDeletions = false;
/** Construct reading the named set of readers. */
public MultiReader(IndexReader[] readers) throws IOException {
this(readers.length == 0 ? null : readers[0].directory(), readers);
}
/** Construct reading the named set of readers. */
public MultiReader(Directory directory, IndexReader[] readers)
throws IOException {
super(directory);
this.readers = readers;
starts = new int[readers.length + 1]; // build starts array
for (int i = 0; i < readers.length; i++) {
starts[i] = maxDoc;
maxDoc += readers[i].maxDoc(); // compute maxDocs
if (readers[i].hasDeletions())
hasDeletions = true;
}
starts[readers.length] = maxDoc;
}
public synchronized int numDocs() {
if (numDocs == -1) { // check cache
int n = 0; // cache miss--recompute
for (int i = 0; i < readers.length; i++)
n += readers[i].numDocs(); // sum from readers
numDocs = n;
}
return numDocs;
}
public int maxDoc() {
return maxDoc;
}
public Document document(int n) throws IOException {
int i = readerIndex(n); // find segment num
return readers[i].document(n - starts[i]); // dispatch to segment reader
}
public boolean isDeleted(int n) {
int i = readerIndex(n); // find segment num
return readers[i].isDeleted(n - starts[i]); // dispatch to segment reader
}
public boolean hasDeletions() { return hasDeletions; }
protected synchronized void doDelete(int n) throws IOException {
numDocs = -1; // invalidate cache
int i = readerIndex(n); // find segment num
readers[i].doDelete(n - starts[i]); // dispatch to segment reader
hasDeletions = true;
}
public void undeleteAll() throws IOException {
for (int i = 0; i < readers.length; i++)
readers[i].undeleteAll();
hasDeletions = false;
}
private int readerIndex(int n) { // find reader for doc n:
int lo = 0; // search starts array
int hi = readers.length - 1; // for first element less
while (hi >= lo) {
int mid = (lo + hi) >> 1;
int midValue = starts[mid];
if (n < midValue)
hi = mid - 1;
else if (n > midValue)
lo = mid + 1;
else { // found a match
while (mid+1 < readers.length && starts[mid+1] == midValue) {
mid++; // scan to last match
}
return mid;
}
}
return hi;
}
public synchronized byte[] norms(String field) throws IOException {
byte[] bytes = (byte[])normsCache.get(field);
if (bytes != null)
return bytes; // cache hit
bytes = new byte[maxDoc()];
for (int i = 0; i < readers.length; i++)
readers[i].norms(field, bytes, starts[i]);
normsCache.put(field, bytes); // update cache
return bytes;
}
public synchronized void norms(String field, byte[] result, int offset)
throws IOException {
byte[] bytes = (byte[])normsCache.get(field);
if (bytes != null) // cache hit
System.arraycopy(bytes, 0, result, offset, maxDoc());
for (int i = 0; i < readers.length; i++) // read from segments
readers[i].norms(field, result, offset + starts[i]);
}
public synchronized void setNorm(int n, String field, byte value)
throws IOException {
normsCache.remove(field); // clear cache
int i = readerIndex(n); // find segment num
readers[i].setNorm(n-starts[i], field, value); // dispatch
}
public TermEnum terms() throws IOException {
return new MultiTermEnum(readers, starts, null);
}
public TermEnum terms(Term term) throws IOException {
return new MultiTermEnum(readers, starts, term);
}
public int docFreq(Term t) throws IOException {
int total = 0; // sum freqs in segments
for (int i = 0; i < readers.length; i++)
total += readers[i].docFreq(t);
return total;
}
public TermDocs termDocs() throws IOException {
return new MultiTermDocs(readers, starts);
}
public TermPositions termPositions() throws IOException {
return new MultiTermPositions(readers, starts);
}
protected synchronized void doClose() throws IOException {
for (int i = 0; i < readers.length; i++)
readers[i].close();
}
/**
* @see IndexReader#getFieldNames()
*/
public Collection getFieldNames() throws IOException {
// maintain a unique set of field names
Set fieldSet = new HashSet();
for (int i = 0; i < readers.length; i++) {
IndexReader reader = readers[i];
Collection names = reader.getFieldNames();
// iterate through the field names and add them to the set
for (Iterator iterator = names.iterator(); iterator.hasNext();) {
String s = (String) iterator.next();
fieldSet.add(s);
}
}
return fieldSet;
}
/**
* @see IndexReader#getFieldNames(boolean)
*/
public Collection getFieldNames(boolean indexed) throws IOException {
// maintain a unique set of field names
Set fieldSet = new HashSet();
for (int i = 0; i < readers.length; i++) {
IndexReader reader = readers[i];
Collection names = reader.getFieldNames(indexed);
fieldSet.addAll(names);
}
return fieldSet;
}
}
class MultiTermEnum extends TermEnum {
private SegmentMergeQueue queue;
private Term term;
private int docFreq;
public MultiTermEnum(IndexReader[] readers, int[] starts, Term t)
throws IOException {
queue = new SegmentMergeQueue(readers.length);
for (int i = 0; i < readers.length; i++) {
IndexReader reader = readers[i];
SegmentTermEnum termEnum;
if (t != null) {
termEnum = (SegmentTermEnum)reader.terms(t);
} else
termEnum = (SegmentTermEnum)reader.terms();
SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader);
if (t == null ? smi.next() : termEnum.term() != null)
queue.put(smi); // initialize queue
else
smi.close();
}
if (t != null && queue.size() > 0) {
next();
}
}
public boolean next() throws IOException {
SegmentMergeInfo top = (SegmentMergeInfo)queue.top();
if (top == null) {
term = null;
return false;
}
term = top.term;
docFreq = 0;
while (top != null && term.compareTo(top.term) == 0) {
queue.pop();
docFreq += top.termEnum.docFreq(); // increment freq
if (top.next())
queue.put(top); // restore queue
else
top.close(); // done with a segment
top = (SegmentMergeInfo)queue.top();
}
return true;
}
public Term term() {
return term;
}
public int docFreq() {
return docFreq;
}
public void close() throws IOException {
queue.close();
}
}
class MultiTermDocs implements TermDocs {
protected IndexReader[] readers;
protected int[] starts;
protected Term term;
protected int base = 0;
protected int pointer = 0;
private SegmentTermDocs[] segTermDocs;
protected SegmentTermDocs current; // == segTermDocs[pointer]
public MultiTermDocs(IndexReader[] r, int[] s) {
readers = r;
starts = s;
segTermDocs = new SegmentTermDocs[r.length];
}
public int doc() {
return base + current.doc;
}
public int freq() {
return current.freq;
}
public void seek(Term term) {
this.term = term;
this.base = 0;
this.pointer = 0;
this.current = null;
}
public void seek(TermEnum termEnum) throws IOException {
seek(termEnum.term());
}
public boolean next() throws IOException {
if (current != null && current.next()) {
return true;
} else if (pointer < readers.length) {
base = starts[pointer];
current = termDocs(pointer++);
return next();
} else
return false;
}
/** Optimized implementation. */
public int read(final int[] docs, final int[] freqs)
throws IOException {
while (true) {
while (current == null) {
if (pointer < readers.length) { // try next segment
base = starts[pointer];
current = termDocs(pointer++);
} else {
return 0;
}
}
int end = current.read(docs, freqs);
if (end == 0) { // none left in segment
current = null;
} else { // got some
final int b = base; // adjust doc numbers
for (int i = 0; i < end; i++)
docs[i] += b;
return end;
}
}
}
/** As yet unoptimized implementation. */
public boolean skipTo(int target) throws IOException {
do {
if (!next())
return false;
} while (target > doc());
return true;
}
private SegmentTermDocs termDocs(int i) throws IOException {
if (term == null)
return null;
SegmentTermDocs result = segTermDocs[i];
if (result == null)
result = segTermDocs[i] = termDocs(readers[i]);
result.seek(term);
return result;
}
protected SegmentTermDocs termDocs(IndexReader reader)
throws IOException {
return (SegmentTermDocs)reader.termDocs();
}
public void close() throws IOException {
for (int i = 0; i < segTermDocs.length; i++) {
if (segTermDocs[i] != null)
segTermDocs[i].close();
}
}
}
class MultiTermPositions extends MultiTermDocs implements TermPositions {
public MultiTermPositions(IndexReader[] r, int[] s) {
super(r,s);
}
protected SegmentTermDocs termDocs(IndexReader reader)
throws IOException {
return (SegmentTermDocs)reader.termPositions();
}
public int nextPosition() throws IOException {
return ((SegmentTermPositions)current).nextPosition();
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org