You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by cu...@apache.org on 2004/02/19 19:28:59 UTC

cvs commit: jakarta-lucene/src/java/org/apache/lucene/index MultiReader.java FilterIndexReader.java IndexReader.java SegmentReader.java SegmentsReader.java

cutting     2004/02/19 10:28:59

  Modified:    .        CHANGES.txt
               src/java/org/apache/lucene/index FilterIndexReader.java
                        IndexReader.java SegmentReader.java
  Added:       src/java/org/apache/lucene/index MultiReader.java
  Removed:     src/java/org/apache/lucene/index SegmentsReader.java
  Log:
  Added MultiReader, an IndexReader that combines multiple other IndexReaders.
  
  Revision  Changes    Path
  1.73      +3 -1      jakarta-lucene/CHANGES.txt
  
  Index: CHANGES.txt
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/CHANGES.txt,v
  retrieving revision 1.72
  retrieving revision 1.73
  diff -u -r1.72 -r1.73
  --- CHANGES.txt	17 Feb 2004 19:00:31 -0000	1.72
  +++ CHANGES.txt	19 Feb 2004 18:28:59 -0000	1.73
  @@ -51,6 +51,8 @@
       one that delegates through that of the Searcher.  (Julien Nioche
       via Cutting)
   
  + 9. Added MultiReader, an IndexReader that combines multiple other
  +    IndexReaders.  (Cutting)
   
   1.3 final
   
  
  
  
  1.6       +3 -0      jakarta-lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
  
  Index: FilterIndexReader.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/FilterIndexReader.java,v
  retrieving revision 1.5
  retrieving revision 1.6
  diff -u -r1.5 -r1.6
  --- FilterIndexReader.java	15 Dec 2003 23:04:42 -0000	1.5
  +++ FilterIndexReader.java	19 Feb 2004 18:28:59 -0000	1.6
  @@ -128,6 +128,9 @@
     public void undeleteAll() throws IOException { in.undeleteAll(); }
   
     public byte[] norms(String f) throws IOException { return in.norms(f); }
  +  public void norms(String f, byte[] bytes, int offset) throws IOException {
  +    in.norms(f, bytes, offset);
  +  }
     public void setNorm(int d, String f, byte b) throws IOException {
       in.setNorm(d,f,b);
     }
  
  
  
  1.25      +11 -3     jakarta-lucene/src/java/org/apache/lucene/index/IndexReader.java
  
  Index: IndexReader.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/IndexReader.java,v
  retrieving revision 1.24
  retrieving revision 1.25
  diff -u -r1.24 -r1.25
  --- IndexReader.java	15 Dec 2003 23:04:42 -0000	1.24
  +++ IndexReader.java	19 Feb 2004 18:28:59 -0000	1.25
  @@ -117,10 +117,10 @@
               if (infos.size() == 1) {		  // index is optimized
                 return new SegmentReader(infos, infos.info(0), true);
               } else {
  -                SegmentReader[] readers = new SegmentReader[infos.size()];
  +                IndexReader[] readers = new IndexReader[infos.size()];
                   for (int i = 0; i < infos.size(); i++)
                     readers[i] = new SegmentReader(infos, infos.info(i), i==infos.size()-1);
  -                return new SegmentsReader(infos, directory, readers);
  +                return new MultiReader(directory, readers);
               }
             }
           }.run();
  @@ -271,6 +271,14 @@
      * @see Field#setBoost(float)
      */
     public abstract byte[] norms(String field) throws IOException;
  +
  +  /** Reads the byte-encoded normalization factor for the named field of every
  +   *  document.  This is used by the search code to score documents.
  +   *
  +   * @see Field#setBoost(float)
  +   */
  +  public abstract void norms(String field, byte[] bytes, int offset)
  +    throws IOException;
   
     /** Expert: Resets the normalization factor for the named field of the named
      * document.  The norm represents the product of the field's {@link
  
  
  
  1.18      +2 -2      jakarta-lucene/src/java/org/apache/lucene/index/SegmentReader.java
  
  Index: SegmentReader.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentReader.java,v
  retrieving revision 1.17
  retrieving revision 1.18
  diff -u -r1.17 -r1.18
  --- SegmentReader.java	15 Dec 2003 23:04:42 -0000	1.17
  +++ SegmentReader.java	19 Feb 2004 18:28:59 -0000	1.18
  @@ -357,7 +357,7 @@
     }
   
     /** Read norms into a pre-allocated array. */
  -  synchronized void norms(String field, byte[] bytes, int offset)
  +  public synchronized void norms(String field, byte[] bytes, int offset)
       throws IOException {
   
       Norm norm = (Norm)norms.get(field);
  
  
  
  1.1                  jakarta-lucene/src/java/org/apache/lucene/index/MultiReader.java
  
  Index: MultiReader.java
  ===================================================================
  package org.apache.lucene.index;
  
  /* ====================================================================
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 2001, 2002, 2003 The Apache Software Foundation.
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Apache" and "Apache Software Foundation" and
   *    "Apache Lucene" must not be used to endorse or promote products
   *    derived from this software without prior written permission. For
   *    written permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache",
   *    "Apache Lucene", nor may "Apache" appear in their name, without
   *    prior written permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  import java.io.IOException;
  import java.util.Collection;
  import java.util.HashSet;
  import java.util.Hashtable;
  import java.util.Iterator;
  import java.util.Set;
  
  import org.apache.lucene.document.Document;
  import org.apache.lucene.store.Directory;
  
  /** An IndexReader which reads multiple indexes, appending their content.
   *
   * @version $Id: MultiReader.java,v 1.1 2004/02/19 18:28:59 cutting Exp $
   */
  public class MultiReader extends IndexReader {
    private IndexReader[] readers;
    private int[] starts;                           // 1st docno for each segment
    private Hashtable normsCache = new Hashtable();
    private int maxDoc = 0;
    private int numDocs = -1;
    private boolean hasDeletions = false;
    
    /** Construct reading the named set of readers. */
    public MultiReader(IndexReader[] readers) throws IOException {
      this(readers.length == 0 ? null : readers[0].directory(), readers);
    }
  
    /** Construct reading the named set of readers. */
    public MultiReader(Directory directory, IndexReader[] readers)
      throws IOException {
      super(directory);
      this.readers = readers;
      starts = new int[readers.length + 1];	  // build starts array
      for (int i = 0; i < readers.length; i++) {
        starts[i] = maxDoc;
        maxDoc += readers[i].maxDoc();		  // compute maxDocs
  
        if (readers[i].hasDeletions())
          hasDeletions = true;
      }
      starts[readers.length] = maxDoc;
    }
  
    public synchronized int numDocs() {
      if (numDocs == -1) {			  // check cache
        int n = 0;				  // cache miss--recompute
        for (int i = 0; i < readers.length; i++)
  	n += readers[i].numDocs();		  // sum from readers
        numDocs = n;
      }
      return numDocs;
    }
  
    public int maxDoc() {
      return maxDoc;
    }
  
    public Document document(int n) throws IOException {
      int i = readerIndex(n);			  // find segment num
      return readers[i].document(n - starts[i]);	  // dispatch to segment reader
    }
  
    public boolean isDeleted(int n) {
      int i = readerIndex(n);			  // find segment num
      return readers[i].isDeleted(n - starts[i]);	  // dispatch to segment reader
    }
  
    public boolean hasDeletions() { return hasDeletions; }
  
    protected synchronized void doDelete(int n) throws IOException {
      numDocs = -1;				  // invalidate cache
      int i = readerIndex(n);			  // find segment num
      readers[i].doDelete(n - starts[i]);		  // dispatch to segment reader
      hasDeletions = true;
    }
  
    public void undeleteAll() throws IOException {
      for (int i = 0; i < readers.length; i++)
        readers[i].undeleteAll();
      hasDeletions = false;
    }
  
    private int readerIndex(int n) {	  // find reader for doc n:
      int lo = 0;					  // search starts array
      int hi = readers.length - 1;                  // for first element less
  
      while (hi >= lo) {
        int mid = (lo + hi) >> 1;
        int midValue = starts[mid];
        if (n < midValue)
  	hi = mid - 1;
        else if (n > midValue)
  	lo = mid + 1;
        else {                                      // found a match
          while (mid+1 < readers.length && starts[mid+1] == midValue) {
            mid++;                                  // scan to last match
          }
  	return mid;
        }
      }
      return hi;
    }
  
    public synchronized byte[] norms(String field) throws IOException {
      byte[] bytes = (byte[])normsCache.get(field);
      if (bytes != null)
        return bytes;				  // cache hit
  
      bytes = new byte[maxDoc()];
      for (int i = 0; i < readers.length; i++)
        readers[i].norms(field, bytes, starts[i]);
      normsCache.put(field, bytes);		  // update cache
      return bytes;
    }
  
    public synchronized void norms(String field, byte[] result, int offset)
      throws IOException {
      byte[] bytes = (byte[])normsCache.get(field);
      if (bytes != null)                            // cache hit
        System.arraycopy(bytes, 0, result, offset, maxDoc());
  
      for (int i = 0; i < readers.length; i++)      // read from segments
        readers[i].norms(field, result, offset + starts[i]);
    }
  
    public synchronized void setNorm(int n, String field, byte value)
      throws IOException {
      normsCache.remove(field);                     // clear cache
      int i = readerIndex(n);			  // find segment num
      readers[i].setNorm(n-starts[i], field, value); // dispatch
    }
  
    public TermEnum terms() throws IOException {
      return new MultiTermEnum(readers, starts, null);
    }
  
    public TermEnum terms(Term term) throws IOException {
      return new MultiTermEnum(readers, starts, term);
    }
  
    public int docFreq(Term t) throws IOException {
      int total = 0;				  // sum freqs in segments
      for (int i = 0; i < readers.length; i++)
        total += readers[i].docFreq(t);
      return total;
    }
  
    public TermDocs termDocs() throws IOException {
      return new MultiTermDocs(readers, starts);
    }
  
    public TermPositions termPositions() throws IOException {
      return new MultiTermPositions(readers, starts);
    }
  
    protected synchronized void doClose() throws IOException {
      for (int i = 0; i < readers.length; i++)
        readers[i].close();
    }
  
    /**
     * @see IndexReader#getFieldNames()
     */
    public Collection getFieldNames() throws IOException {
      // maintain a unique set of field names
      Set fieldSet = new HashSet();
      for (int i = 0; i < readers.length; i++) {
        IndexReader reader = readers[i];
        Collection names = reader.getFieldNames();
        // iterate through the field names and add them to the set
        for (Iterator iterator = names.iterator(); iterator.hasNext();) {
          String s = (String) iterator.next();
          fieldSet.add(s);
        }
      }
      return fieldSet;
    }
  
    /**
     * @see IndexReader#getFieldNames(boolean)
     */
    public Collection getFieldNames(boolean indexed) throws IOException {
      // maintain a unique set of field names
      Set fieldSet = new HashSet();
      for (int i = 0; i < readers.length; i++) {
        IndexReader reader = readers[i];
        Collection names = reader.getFieldNames(indexed);
        fieldSet.addAll(names);
      }
      return fieldSet;
    }
  }
  
  class MultiTermEnum extends TermEnum {
    private SegmentMergeQueue queue;
  
    private Term term;
    private int docFreq;
  
    public MultiTermEnum(IndexReader[] readers, int[] starts, Term t)
      throws IOException {
      queue = new SegmentMergeQueue(readers.length);
      for (int i = 0; i < readers.length; i++) {
        IndexReader reader = readers[i];
        SegmentTermEnum termEnum;
  
        if (t != null) {
  	termEnum = (SegmentTermEnum)reader.terms(t);
        } else
  	termEnum = (SegmentTermEnum)reader.terms();
  
        SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader);
        if (t == null ? smi.next() : termEnum.term() != null)
  	queue.put(smi);				  // initialize queue
        else
  	smi.close();
      }
  
      if (t != null && queue.size() > 0) {
        next();
      }
    }
  
    public boolean next() throws IOException {
      SegmentMergeInfo top = (SegmentMergeInfo)queue.top();
      if (top == null) {
        term = null;
        return false;
      }
  
      term = top.term;
      docFreq = 0;
  
      while (top != null && term.compareTo(top.term) == 0) {
        queue.pop();
        docFreq += top.termEnum.docFreq();	  // increment freq
        if (top.next())
  	queue.put(top);				  // restore queue
        else
  	top.close();				  // done with a segment
        top = (SegmentMergeInfo)queue.top();
      }
      return true;
    }
  
    public Term term() {
      return term;
    }
  
    public int docFreq() {
      return docFreq;
    }
  
    public void close() throws IOException {
      queue.close();
    }
  }
  
  class MultiTermDocs implements TermDocs {
    protected IndexReader[] readers;
    protected int[] starts;
    protected Term term;
  
    protected int base = 0;
    protected int pointer = 0;
  
    private SegmentTermDocs[] segTermDocs;
    protected SegmentTermDocs current;              // == segTermDocs[pointer]
  
    public MultiTermDocs(IndexReader[] r, int[] s) {
      readers = r;
      starts = s;
  
      segTermDocs = new SegmentTermDocs[r.length];
    }
  
    public int doc() {
      return base + current.doc;
    }
    public int freq() {
      return current.freq;
    }
  
    public void seek(Term term) {
      this.term = term;
      this.base = 0;
      this.pointer = 0;
      this.current = null;
    }
  
    public void seek(TermEnum termEnum) throws IOException {
      seek(termEnum.term());
    }
  
    public boolean next() throws IOException {
      if (current != null && current.next()) {
        return true;
      } else if (pointer < readers.length) {
        base = starts[pointer];
        current = termDocs(pointer++);
        return next();
      } else
        return false;
    }
  
    /** Optimized implementation. */
    public int read(final int[] docs, final int[] freqs)
        throws IOException {
      while (true) {
        while (current == null) {
  	if (pointer < readers.length) {		  // try next segment
  	  base = starts[pointer];
  	  current = termDocs(pointer++);
  	} else {
  	  return 0;
  	}
        }
        int end = current.read(docs, freqs);
        if (end == 0) {				  // none left in segment
  	current = null;
        } else {					  // got some
  	final int b = base;			  // adjust doc numbers
  	for (int i = 0; i < end; i++)
  	  docs[i] += b;
  	return end;
        }
      }
    }
  
    /** As yet unoptimized implementation. */
    public boolean skipTo(int target) throws IOException {
      do {
        if (!next())
  	return false;
      } while (target > doc());
      return true;
    }
  
    private SegmentTermDocs termDocs(int i) throws IOException {
      if (term == null)
        return null;
      SegmentTermDocs result = segTermDocs[i];
      if (result == null)
        result = segTermDocs[i] = termDocs(readers[i]);
      result.seek(term);
      return result;
    }
  
    protected SegmentTermDocs termDocs(IndexReader reader)
      throws IOException {
      return (SegmentTermDocs)reader.termDocs();
    }
  
    public void close() throws IOException {
      for (int i = 0; i < segTermDocs.length; i++) {
        if (segTermDocs[i] != null)
          segTermDocs[i].close();
      }
    }
  }
  
  class MultiTermPositions extends MultiTermDocs implements TermPositions {
    public MultiTermPositions(IndexReader[] r, int[] s) {
      super(r,s);
    }
  
    protected SegmentTermDocs termDocs(IndexReader reader)
         throws IOException {
      return (SegmentTermDocs)reader.termPositions();
    }
  
    public int nextPosition() throws IOException {
      return ((SegmentTermPositions)current).nextPosition();
    }
  
  }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org