You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2008/12/16 16:09:48 UTC

svn commit: r727063 - in /lucene/java/trunk/contrib/benchmark: CHANGES.txt src/java/org/apache/lucene/benchmark/byTask/package.html src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java

Author: mikemccand
Date: Tue Dec 16 07:09:46 2008
New Revision: 727063

URL: http://svn.apache.org/viewvc?rev=727063&view=rev
Log:
LUCENE-1493: allow setting top number of hits to collect with search.num.hits

Modified:
    lucene/java/trunk/contrib/benchmark/CHANGES.txt
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java

Modified: lucene/java/trunk/contrib/benchmark/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/CHANGES.txt?rev=727063&r1=727062&r2=727063&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/CHANGES.txt (original)
+++ lucene/java/trunk/contrib/benchmark/CHANGES.txt Tue Dec 16 07:09:46 2008
@@ -3,6 +3,9 @@
 The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways.
 
 $Id:$
+12/16/08
+  LUCENE-1493: Stop using deprecated Hits API for searching; add new
+  param search.num.hits to set top N docs to collect.
 
 12/16/08
   LUCENE-1492: Added optional readOnly param (default true) to OpenReader task.

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html?rev=727063&r1=727062&r2=727063&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html Tue Dec 16 07:09:46 2008
@@ -539,6 +539,7 @@
     </li><li>query.maker
     </li><li>file.query.maker.file
     </li><li>file.query.maker.default.field
+    </li><li>search.num.hits
     </li></ul>
   </li>
 
@@ -689,4 +690,4 @@
 </DIV>
 <DIV>&nbsp;</DIV>
 </BODY>
-</HTML>
\ No newline at end of file
+</HTML>

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java?rev=727063&r1=727062&r2=727063&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java Tue Dec 16 07:09:46 2008
@@ -31,7 +31,8 @@
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Fieldable;
 import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.search.Hits;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Sort;
@@ -50,7 +51,9 @@
  * <p/>
  * <p>Note: All ReadTasks reuse the reader if it is already open.
  * Otherwise a reader is opened at start and closed at the end.
- * <p/>
+ * <p>
+ * The <code>search.num.hits</code> config parameter sets
+ * the top number of hits to collect during searching.
  * <p>Other side effects: none.
  */
 public abstract class ReadTask extends PerfTask {
@@ -89,40 +92,45 @@
       QueryMaker queryMaker = getQueryMaker();
       Query q = queryMaker.makeQuery();
       Sort sort = getSort();
-      Hits hits;
-      if(sort != null) {
-        hits = searcher.search(q, sort);
-      } else {
-        hits = searcher.search(q);
-      }
-      //System.out.println("searched: "+q);
+      TopDocs hits;
+      final int numHits = numHits();
+      if (numHits > 0) {
+        if (sort != null) {
+          hits = searcher.search(q, null, numHits, sort);
+        } else {
+          hits = searcher.search(q, numHits);
+        }
+        //System.out.println("q=" + q + ":" + hits.totalHits + " total hits"); 
 
-      if (withTraverse() && hits != null) {
-        int traversalSize = Math.min(hits.length(), traversalSize());
-        if (traversalSize > 0) {
-          boolean retrieve = withRetrieve();
-          int numHighlight = Math.min(numToHighlight(), hits.length());
-          Analyzer analyzer = getRunData().getAnalyzer();
-          Highlighter highlighter = null;
-          int maxFrags = 1;
-          if (numHighlight > 0) {
-            highlighter = getHighlighter(q);
-            maxFrags = maxNumFragments();
-          }
-          boolean merge = isMergeContiguousFragments();
-          for (int m = 0; m < traversalSize; m++) {
-            int id = hits.id(m);
-            res++;
-            if (retrieve) {
-              Document document = retrieveDoc(ir, id);
-              res += document != null ? 1 : 0;
-              if (numHighlight > 0 && m < numHighlight) {
-                Collection/*<String>*/ fieldsToHighlight = getFieldsToHighlight(document);
-                for (Iterator iterator = fieldsToHighlight.iterator(); iterator.hasNext();) {
-                  String field = (String) iterator.next();
-                  String text = document.get(field);
-                  TokenStream ts = TokenSources.getAnyTokenStream(ir, id, field, document, analyzer);
-                  res += doHighlight(ts, text, highlighter, merge, maxFrags);
+        if (withTraverse()) {
+          final ScoreDoc[] scoreDocs = hits.scoreDocs;
+          int traversalSize = Math.min(scoreDocs.length, traversalSize());
+
+          if (traversalSize > 0) {
+            boolean retrieve = withRetrieve();
+            int numHighlight = Math.min(numToHighlight(), scoreDocs.length);
+            Analyzer analyzer = getRunData().getAnalyzer();
+            Highlighter highlighter = null;
+            int maxFrags = 1;
+            if (numHighlight > 0) {
+              highlighter = getHighlighter(q);
+              maxFrags = maxNumFragments();
+            }
+            boolean merge = isMergeContiguousFragments();
+            for (int m = 0; m < traversalSize; m++) {
+              int id = scoreDocs[m].doc;
+              res++;
+              if (retrieve) {
+                Document document = retrieveDoc(ir, id);
+                res += document != null ? 1 : 0;
+                if (numHighlight > 0 && m < numHighlight) {
+                  Collection/*<String>*/ fieldsToHighlight = getFieldsToHighlight(document);
+                  for (Iterator iterator = fieldsToHighlight.iterator(); iterator.hasNext();) {
+                    String field = (String) iterator.next();
+                    String text = document.get(field);
+                    TokenStream ts = TokenSources.getAnyTokenStream(ir, id, field, document, analyzer);
+                    res += doHighlight(ts, text, highlighter, merge, maxFrags);
+                  }
                 }
               }
             }
@@ -178,6 +186,24 @@
     return Integer.MAX_VALUE;
   }
 
+  static final int DEFAULT_SEARCH_NUM_HITS = 10;
+  private int numHits;
+
+  public void setup() throws Exception {
+    super.setup();
+    numHits = getRunData().getConfig().get("search.num.hits", DEFAULT_SEARCH_NUM_HITS);
+  }
+
+  /**
+   * Specify the number of hits to retrieve.  Tasks should override this if they want to restrict the number
+   * of hits that are collected during searching. Must be greater than 0.
+   *
+   * @return 10 by default, or search.num.hits config if set.
+   */
+  public int numHits() {
+    return numHits;
+  }
+
   /**
    * Return true if, with search & results traversing, docs should be retrieved.
    */