You are viewing a plain text version of this content. The canonical link for it is here.
Posted to general@lucene.apache.org by "yannianmu (母延年)" <ya...@tencent.com> on 2015/05/13 04:05:58 UTC

Re: Controlling size of matched results in Lucene(Internet mail)

first ,why not use TopDocs  method  in IndexSearcher class to return the top documents?
see TopDocs search(Query query, Filter filter, int n)
public final TopFieldDocs search(Query query, Filter filter, int n, Sort sort) throws IOException {
second if that can`t satify with you,you could make a custom defined Collector  like belows



public class FacetsCollector extends SimpleCollector implements Collector {

  private LeafReaderContext context;
  private Scorer scorer;
  private int totalHits;
  private float[] scores;
  private final boolean keepScores;
  private final List<MatchingDocs> matchingDocs = new ArrayList<>();
  private Docs docs;
  /**
   * Used during collection to record matching docs and then return a
   * {@link DocIdSet} that contains them.
   */
  protected static abstract class Docs {

    /** Solr constructor. */
    public Docs() {}

    /** Record the given document. */
    public abstract void addDoc(int docId) throws IOException;

    /** Return the {@link DocIdSet} which contains all the recorded docs. */
    public abstract DocIdSet getDocIdSet();
  }

  /**
   * Holds the documents that were matched in the {@link org.apache.lucene.index.LeafReaderContext}.
   * If scores were required, then {@code scores} is not null.
   */
  public final static class MatchingDocs {

    /** Context for this segment. */
    public final LeafReaderContext context;

    /** Which documents were seen. */
    public final DocIdSet bits;

    /** Non-sparse scores array. */
    public final float[] scores;

    /** Total number of hits */
    public final int totalHits;

    /** Sole constructor. */
    public MatchingDocs(LeafReaderContext context, DocIdSet bits, int totalHits, float[] scores) {
      this.context = context;
      this.bits = bits;
      this.scores = scores;
      this.totalHits = totalHits;
    }
  }

  /** Default constructor */
  public FacetsCollector() {
    this(false);
  }

  /** Create this; if {@code keepScores} is true then a
   *  float[] is allocated to hold score of all hits. */
  public FacetsCollector(boolean keepScores) {
    this.keepScores = keepScores;
  }
  /**
   * Creates a {@link Docs} to record hits. The default uses {@link FixedBitSet}
   * to record hits and you can override to e.g. record the docs in your own
   * {@link DocIdSet}.
   */
  protected Docs createDocs(final int maxDoc) {
    return new Docs() {
      private final FixedBitSet bits = new FixedBitSet(maxDoc);

      @Override
      public void addDoc(int docId) throws IOException {
        bits.set(docId);
      }

      @Override
      public DocIdSet getDocIdSet() {
        return new BitDocIdSet(bits);
      }
    };
  }

  /** True if scores were saved. */
  public final boolean getKeepScores() {
    return keepScores;
  }
  /**
   * Returns the documents matched by the query, one {@link MatchingDocs} per
   * visited segment.
   */
  public List<MatchingDocs> getMatchingDocs() {
    if (docs != null) {
      matchingDocs.add(new MatchingDocs(this.context, docs.getDocIdSet(), totalHits, scores));
      docs = null;
      scores = null;
      context = null;
    }

    return matchingDocs;
  }

  @Override
  public final void collect(int doc) throws IOException {
    docs.addDoc(doc);
    if (keepScores) {
      if (totalHits >= scores.length) {
        float[] newScores = new float[ArrayUtil.oversize(totalHits + 1, 4)];
        System.arraycopy(scores, 0, newScores, 0, totalHits);
        scores = newScores;
      }
      scores[totalHits] = scorer.score();
    }
    totalHits++;
  }

  @Override
  public boolean needsScores() {
    return true;
  }

  @Override
  public final void setScorer(Scorer scorer) throws IOException {
    this.scorer = scorer;
  }

  @Override
  protected void doSetNextReader(LeafReaderContext context) throws IOException {
    if (docs != null) {
      matchingDocs.add(new MatchingDocs(this.context, docs.getDocIdSet(), totalHits, scores));
    }
    docs = createDocs(context.reader().maxDoc());
    totalHits = 0;
    if (keepScores) {
      scores = new float[64]; // some initial size
    }
    this.context = context;
  }

  /** Utility method, to search and also collect all hits
   *  into the provided {@link Collector}. */
  public static TopDocs search(IndexSearcher searcher, Query q, int n, Collector fc) throws IOException {
    return doSearch(searcher, null, q, null, n, null, false, false, fc);
  }

  /** Utility method, to search and also collect all hits
   *  into the provided {@link Collector}. */
  public static TopDocs search(IndexSearcher searcher, Query q, Filter filter, int n, Collector fc) throws IOException {
    return doSearch(searcher, null, q, filter, n, null, false, false, fc);
  }

  /** Utility method, to search and also collect all hits
   *  into the provided {@link Collector}. */
  public static TopFieldDocs search(IndexSearcher searcher, Query q, Filter filter, int n, Sort sort, Collector fc) throws IOException {
    if (sort == null) {
      throw new IllegalArgumentException("sort must not be null");
    }
    return (TopFieldDocs) doSearch(searcher, null, q, filter, n, sort, false, false, fc);
  }

  /** Utility method, to search and also collect all hits
   *  into the provided {@link Collector}. */
  public static TopFieldDocs search(IndexSearcher searcher, Query q, Filter filter, int n, Sort sort, boolean doDocScores, boolean doMaxScore, Collector fc) throws IOException {
    if (sort == null) {
      throw new IllegalArgumentException("sort must not be null");
    }
    return (TopFieldDocs) doSearch(searcher, null, q, filter, n, sort, doDocScores, doMaxScore, fc);
  }

  /** Utility method, to search and also collect all hits
   *  into the provided {@link Collector}. */
  public TopDocs searchAfter(IndexSearcher searcher, ScoreDoc after, Query q, int n, Collector fc) throws IOException {
    return doSearch(searcher, after, q, null, n, null, false, false, fc);
  }

  /** Utility method, to search and also collect all hits
   *  into the provided {@link Collector}. */
  public static TopDocs searchAfter(IndexSearcher searcher, ScoreDoc after, Query q, Filter filter, int n, Collector fc) throws IOException {
    return doSearch(searcher, after, q, filter, n, null, false, false, fc);
  }

  /** Utility method, to search and also collect all hits
   *  into the provided {@link Collector}. */
  public static TopDocs searchAfter(IndexSearcher searcher, ScoreDoc after, Query q, Filter filter, int n, Sort sort, Collector fc) throws IOException {
    if (sort == null) {
      throw new IllegalArgumentException("sort must not be null");
    }
    return doSearch(searcher, after, q, filter, n, sort, false, false, fc);
  }

  /** Utility method, to search and also collect all hits
   *  into the provided {@link Collector}. */
  public static TopDocs searchAfter(IndexSearcher searcher, ScoreDoc after, Query q, Filter filter, int n, Sort sort, boolean doDocScores, boolean doMaxScore, Collector fc) throws IOException {
    if (sort == null) {
      throw new IllegalArgumentException("sort must not be null");
    }
    return doSearch(searcher, after, q, filter, n, sort, doDocScores, doMaxScore, fc);
  }

  private static TopDocs doSearch(IndexSearcher searcher, ScoreDoc after, Query q, Filter filter, int n, Sort sort,
                                  boolean doDocScores, boolean doMaxScore, Collector fc) throws IOException {

    if (filter != null) {
      q = new FilteredQuery(q, filter);
    }

    int limit = searcher.getIndexReader().maxDoc();
    if (limit == 0) {
      limit = 1;
    }
    n = Math.min(n, limit);

    if (after != null && after.doc >= limit) {
      throw new IllegalArgumentException("after.doc exceeds the number of documents in the reader: after.doc="
                                         + after.doc + " limit=" + limit);
    }

    TopDocsCollector<?> hitsCollector;
    if (sort != null) {
      if (after != null && !(after instanceof FieldDoc)) {
        // TODO: if we fix type safety of TopFieldDocs we can
        // remove this
        throw new IllegalArgumentException("after must be a FieldDoc; got " + after);
      }
      boolean fillFields = true;
      hitsCollector = TopFieldCollector.create(sort, n,
                                               (FieldDoc) after,
                                               fillFields,
                                               doDocScores,
                                               doMaxScore);
    } else {
      hitsCollector = TopScoreDocCollector.create(n, after);
    }
    searcher.search(q, MultiCollector.wrap(hitsCollector, fc));
    return hitsCollector.topDocs();
  }
}
________________________________
yannianmu(母延年)

From: Susrutha Gongalla<ma...@gmail.com>
Date: 2015-05-12 18:27
To: general@lucene.apache.org<ma...@lucene.apache.org>
Subject: Controlling size of matched results in Lucene(Internet mail)

Hello,

I am developing a matching algorithm using Lucene 4.10.0
My index consists of about 2000 documents.
When I use the 'search' method on a query term, I get about n results that
point to the n documents in index along with their corresponding scores.
What I would like to get is - All the 2000 documents with their lucene
scores along with whether they are matched/unmatched.
I would like to control the size of results that lucene returns when I
search for a query term.

I have tried altering the default similarity used in lucene by overriding
the score methods.
However, this did not affect the size of results generated by lucene.

I also tried explicitly given 'null' value for filter, when calling the
'search' method.
This also did not affect the size of results.

I just started working with Lucene.
Would appreciate any help in this regard!

Best,
Susrutha Gongalla