You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by gs...@apache.org on 2007/03/23 04:48:13 UTC
svn commit: r521569 - in /lucene/java/trunk/contrib/benchmark: ./
src/java/org/apache/lucene/benchmark/byTask/feeds/
src/java/org/apache/lucene/benchmark/byTask/tasks/
Author: gsingers
Date: Thu Mar 22 20:48:12 2007
New Revision: 521569
URL: http://svn.apache.org/viewvc?view=rev&rev=521569
Log:
LUCENE-837:
Added optional bytes field to store on the Document. Enabled ReutersDocMaker w/ the ability to store byte data in a field. If the param is set (see the javadocs) it will store the contents of the body as a UTF-8 byte array.
Then, the SearchTravRetLoadFieldSelectorTask (whew) can take in parameters specifying what fields to load (others are ignored by default)
Added:
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetLoadFieldSelectorTask.java (with props)
Modified:
lucene/java/trunk/contrib/benchmark/CHANGES.txt
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java
Modified: lucene/java/trunk/contrib/benchmark/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/CHANGES.txt?view=diff&rev=521569&r1=521568&r2=521569
==============================================================================
--- lucene/java/trunk/contrib/benchmark/CHANGES.txt (original)
+++ lucene/java/trunk/contrib/benchmark/CHANGES.txt Thu Mar 22 20:48:12 2007
@@ -4,6 +4,12 @@
$Id:$
+3/22/07
+
+-Moved withRetrieve() call out of the loop in ReadTask
+-Added SearchTravRetLoadFieldSelectorTask to help benchmark some of the FieldSelector capabilities
+-Added options to store content bytes on the Reuters Doc (and others, but Reuters is the only one w/ it enabled)
+
3/21/07
Tests (for benchmarking code correctness) were added - LUCENE-840.
Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java?view=diff&rev=521569&r1=521568&r2=521569
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java Thu Mar 22 20:48:12 2007
@@ -17,18 +17,18 @@
* limitations under the License.
*/
+import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.benchmark.byTask.utils.Format;
+import org.apache.lucene.document.DateTools;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+
import java.io.File;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.Properties;
-import org.apache.lucene.document.DateTools;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.benchmark.byTask.utils.Config;
-import org.apache.lucene.benchmark.byTask.utils.Format;
-
/**
* Create documents for the test.
@@ -44,6 +44,7 @@
Date date;
String title;
String body;
+ byte [] bytes;
Properties props;
}
@@ -122,6 +123,10 @@
docData.body = docData.body.substring(size); // some left
}
doc.add(new Field(BODY_FIELD, bdy, storeVal, indexVal, termVecVal));
+ }
+ if (docData.bytes != null && docData.bytes.length != 0)
+ {
+ doc.add(new Field("bytes", docData.bytes, Field.Store.YES));
}
if (docData.props!=null) {
for (Iterator it = docData.props.keySet().iterator(); it.hasNext(); ) {
Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java?view=diff&rev=521569&r1=521568&r2=521569
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java Thu Mar 22 20:48:12 2007
@@ -17,6 +17,8 @@
* limitations under the License.
*/
+import org.apache.lucene.benchmark.byTask.utils.Config;
+
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
@@ -25,11 +27,14 @@
import java.util.ArrayList;
import java.util.Locale;
-import org.apache.lucene.benchmark.byTask.utils.Config;
-
/**
* A DocMaker using the Reuters collection for its input.
+ *
+ * Config properties:
+ * docs.dir=<path to the docs dir| Default: reuters-out>
+ * reuters.doc.maker.store.bytes=true|false Default: false
+ *
*/
public class ReutersDocMaker extends BasicDocMaker {
@@ -38,7 +43,7 @@
private ArrayList inputFiles = new ArrayList();
private int nextFile = 0;
private int iteration=0;
-
+ private boolean storeBytes = false;
/* (non-Javadoc)
* @see SimpleDocMaker#setConfig(java.util.Properties)
*/
@@ -46,6 +51,8 @@
super.setConfig(config);
String d = config.get("docs.dir","reuters-out");
dataDir = new File(new File("work"),d);
+ storeBytes = config.get("reuters.doc.maker.store.bytes", false);
+
collectFiles(dataDir,inputFiles);
if (inputFiles.size()==0) {
throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath());
@@ -89,6 +96,10 @@
dd.name = name;
dd.title = title;
dd.body = bodyBuf.toString();
+ if (storeBytes == true)
+ {
+ dd.bytes = dd.body.getBytes("UTF-8");
+ }
return dd;
}
Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java?view=diff&rev=521569&r1=521568&r2=521569
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java Thu Mar 22 20:48:12 2007
@@ -26,6 +26,7 @@
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
+import java.io.IOException;
/**
@@ -76,17 +77,15 @@
Hits hits = searcher.search(q);
//System.out.println("searched: "+q);
- if (withTraverse() && hits!=null) {
- Document doc = null;
- int traversalSize = Math.min(hits.length(), traversalSize());
- if (traversalSize > 0) {
+ if (withTraverse() && hits!=null) {
+ int traversalSize = Math.min(hits.length(), traversalSize());
+ if (traversalSize > 0) {
+ boolean retrieve = withRetrieve();
for (int m = 0; m < hits.length(); m++) {
int id = hits.id(m);
res++;
-
- if (withRetrieve()) {
- doc = ir.document(id);
- res += (doc==null ? 0 : 1);
+ if (retrieve) {
+ res += retrieveDoc(ir, id);
}
}
}
@@ -101,6 +100,10 @@
return res;
}
+ protected int retrieveDoc(IndexReader ir, int id) throws IOException {
+ return (ir.document(id) == null ? 0 : 1);
+ }
+
/**
* Return query maker used for this task.
*/
@@ -122,18 +125,18 @@
public abstract boolean withTraverse ();
/**
- * Specify the number of hits to traverse. Tasks should override this if they want to restrict the number
- * of hits that are traversed when {@link #withTraverse()} is true. Must be greater than 0.
- *
- * Read task calculates the traversal as: Math.min(hits.length(), traversalSize())
- * @return Integer.MAX_VALUE
- */
- public int traversalSize()
- {
- return Integer.MAX_VALUE;
- }
-
- /**
+ * Specify the number of hits to traverse. Tasks should override this if they want to restrict the number
+ * of hits that are traversed when {@link #withTraverse()} is true. Must be greater than 0.
+ *
+ * Read task calculates the traversal as: Math.min(hits.length(), traversalSize())
+ * @return Integer.MAX_VALUE
+ */
+ public int traversalSize()
+ {
+ return Integer.MAX_VALUE;
+ }
+
+ /**
* Return true if, with search & results traversing, docs should be retrieved.
*/
public abstract boolean withRetrieve ();
Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetLoadFieldSelectorTask.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetLoadFieldSelectorTask.java?view=auto&rev=521569
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetLoadFieldSelectorTask.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetLoadFieldSelectorTask.java Thu Mar 22 20:48:12 2007
@@ -0,0 +1,55 @@
+package org.apache.lucene.benchmark.byTask.tasks;
+/**
+ * Created by IntelliJ IDEA.
+ * User: Grant Ingersoll
+ * Date: Mar 22, 2007
+ * Time: 10:04:49 PM
+ * $Id:$
+ * Copyright 2007. Center For Natural Language Processing
+ */
+
+import org.apache.lucene.benchmark.byTask.PerfRunData;
+import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.document.SetBasedFieldSelector;
+import org.apache.lucene.index.IndexReader;
+
+import java.util.StringTokenizer;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.Collections;
+import java.io.IOException;
+
+/**
+ * Search and Travrese and Retrieve docs task using a SetBasedFieldSelector.
+ *
+ * <p>Note: This task reuses the reader if it is already open.
+ * Otherwise a reader is opened at start and closed at the end.
+ *
+ * Takes optional param: comma separated list of Fields to load.
+ */
+public class SearchTravRetLoadFieldSelectorTask extends SearchTravTask {
+
+ protected FieldSelector fieldSelector;
+ public SearchTravRetLoadFieldSelectorTask(PerfRunData runData) {
+ super(runData);
+
+ }
+
+ public boolean withRetrieve() {
+ return true;
+ }
+
+
+ protected int retrieveDoc(IndexReader ir, int id) throws IOException {
+ return (ir.document(id, fieldSelector) == null ? 0 : 1);
+ }
+
+ public void setParams(String params) {
+ Set fieldsToLoad = new HashSet();
+ for (StringTokenizer tokenizer = new StringTokenizer(params, ","); tokenizer.hasMoreTokens();) {
+ String s = tokenizer.nextToken();
+ fieldsToLoad.add(s);
+ }
+ fieldSelector = new SetBasedFieldSelector(fieldsToLoad, Collections.EMPTY_SET);
+ }
+}
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetLoadFieldSelectorTask.java
------------------------------------------------------------------------------
svn:eol-style = native