You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by gs...@apache.org on 2007/03/23 14:13:47 UTC
svn commit: r521717 - in /lucene/java/trunk/contrib/benchmark/src:
java/org/apache/lucene/benchmark/byTask/feeds/
java/org/apache/lucene/benchmark/byTask/tasks/
test/org/apache/lucene/benchmark/byTask/
Author: gsingers
Date: Fri Mar 23 06:13:46 2007
New Revision: 521717
URL: http://svn.apache.org/viewvc?view=rev&rev=521717
Log:
LUCENE-837:
Fixed issues related to Doron's comments on Mar. 22, 07.
PUt in tests for ***FieldSelectorTask. Moved the byte field handling from ReutersDocMaker to BasicDocMaker.
Added in tests for new Task
Modified:
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetLoadFieldSelectorTask.java
lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksParse.java
Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java?view=diff&rev=521717&r1=521716&r2=521717
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java Fri Mar 23 06:13:46 2007
@@ -24,6 +24,7 @@
import org.apache.lucene.document.Field;
import java.io.File;
+import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
@@ -34,17 +35,24 @@
* Create documents for the test.
* Maintains counters of chars etc. so that sub-classes just need to
* provide textual content, and the create-by-size is handled here.
+ *
+ * <p/>
+ * Config Params (default is in caps):
+ * doc.stored=true|FALSE<br/>
+ * doc.tokenized=TRUE|false<br/>
+ * doc.term.vector=true|FALSE<br/>
+ * doc.store.bytes=true|FALSE //Store the body contents raw UTF-8 bytes as a field<br/>
*/
public abstract class BasicDocMaker implements DocMaker {
private int numDocsCreated = 0;
-
+ private boolean storeBytes = false;
+
static class DocData {
String name;
Date date;
String title;
String body;
- byte [] bytes;
Properties props;
}
@@ -91,7 +99,7 @@
// create a doc
// use only part of the body, modify it to keep the rest (or use all if size==0).
// reset the docdata properties so they are not added more than once.
- private Document createDocument(DocData docData, int size, int cnt) {
+ private Document createDocument(DocData docData, int size, int cnt) throws UnsupportedEncodingException {
int docid = incrNumDocsCreated();
Document doc = new Document();
doc.add(new Field("docid", "doc"+docid, storeVal, indexVal, termVecVal));
@@ -123,11 +131,11 @@
docData.body = docData.body.substring(size); // some left
}
doc.add(new Field(BODY_FIELD, bdy, storeVal, indexVal, termVecVal));
+ if (storeBytes == true) {
+ doc.add(new Field("bytes", bdy.getBytes("UTF-8"), Field.Store.YES));
+ }
}
- if (docData.bytes != null && docData.bytes.length != 0)
- {
- doc.add(new Field("bytes", docData.bytes, Field.Store.YES));
- }
+
if (docData.props!=null) {
for (Iterator it = docData.props.keySet().iterator(); it.hasNext(); ) {
String key = (String) it.next();
@@ -186,6 +194,7 @@
storeVal = (stored ? Field.Store.YES : Field.Store.NO);
indexVal = (tokenized ? Field.Index.TOKENIZED : Field.Index.UN_TOKENIZED);
termVecVal = (termVec ? Field.TermVector.YES : Field.TermVector.NO);
+ storeBytes = config.get("doc.store.body.bytes", false);
}
/*
Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java?view=diff&rev=521717&r1=521716&r2=521717
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java Fri Mar 23 06:13:46 2007
@@ -33,7 +33,7 @@
*
* Config properties:
* docs.dir=<path to the docs dir| Default: reuters-out>
- * reuters.doc.maker.store.bytes=true|false Default: false
+
*
*/
public class ReutersDocMaker extends BasicDocMaker {
@@ -43,7 +43,7 @@
private ArrayList inputFiles = new ArrayList();
private int nextFile = 0;
private int iteration=0;
- private boolean storeBytes = false;
+
/* (non-Javadoc)
* @see SimpleDocMaker#setConfig(java.util.Properties)
*/
@@ -51,7 +51,7 @@
super.setConfig(config);
String d = config.get("docs.dir","reuters-out");
dataDir = new File(new File("work"),d);
- storeBytes = config.get("reuters.doc.maker.store.bytes", false);
+
collectFiles(dataDir,inputFiles);
if (inputFiles.size()==0) {
@@ -96,10 +96,6 @@
dd.name = name;
dd.title = title;
dd.body = bodyBuf.toString();
- if (storeBytes == true)
- {
- dd.bytes = dd.body.getBytes("UTF-8");
- }
return dd;
}
Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java?view=diff&rev=521717&r1=521716&r2=521717
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java Fri Mar 23 06:13:46 2007
@@ -30,8 +30,8 @@
* <br>
* Tasks performing some work that should be measured for the task, can overide setup() and/or tearDown() and
* placed that work there.
- * <br>
- * Relevant properties: <code>task.max.depth.log</code>.
+ * <br>
+ * Relevant properties: <code>task.max.depth.log</code>.
*/
public abstract class PerfTask implements Cloneable {
@@ -41,7 +41,7 @@
private String name;
private int depth = 0;
private int maxDepthLogStart = 0;
- private String params = null;
+ protected String params = null;
protected static final String NEW_LINE = System.getProperty("line.separator");
@@ -202,24 +202,24 @@
}
/**
- * Sub classes that supports parameters must overide this method to return true.
- * @return true iff this task supports command line params.
- */
- public boolean supportsParams () {
- return false;
- }
-
- /**
+ * Sub classes that supports parameters must overide this method to return true.
+ * @return true iff this task supports command line params.
+ */
+ public boolean supportsParams () {
+ return false;
+ }
+
+ /**
* Set the params of this task.
- * @exception UnsupportedOperationException for tasks supporting command line parameters.
+ * @exception UnsupportedOperationException for tasks supporting command line parameters.
*/
public void setParams(String params) {
- if (!supportsParams()) {
- throw new UnsupportedOperationException(getName()+" does not support command line parameters.");
- }
+ if (!supportsParams()) {
+ throw new UnsupportedOperationException(getName()+" does not support command line parameters.");
+ }
this.params = params;
}
-
+
/**
* @return Returns the Params.
*/
Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetLoadFieldSelectorTask.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetLoadFieldSelectorTask.java?view=diff&rev=521717&r1=521716&r2=521717
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetLoadFieldSelectorTask.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetLoadFieldSelectorTask.java Fri Mar 23 06:13:46 2007
@@ -53,11 +53,20 @@
}
public void setParams(String params) {
+ this.params = params;
Set fieldsToLoad = new HashSet();
for (StringTokenizer tokenizer = new StringTokenizer(params, ","); tokenizer.hasMoreTokens();) {
String s = tokenizer.nextToken();
fieldsToLoad.add(s);
}
fieldSelector = new SetBasedFieldSelector(fieldsToLoad, Collections.EMPTY_SET);
+ }
+
+
+ /* (non-Javadoc)
+ * @see org.apache.lucene.benchmark.byTask.tasks.PerfTask#supportsParams()
+ */
+ public boolean supportsParams() {
+ return true;
}
}
Modified: lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksParse.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksParse.java?view=diff&rev=521717&r1=521716&r2=521717
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksParse.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksParse.java Fri Mar 23 06:13:46 2007
@@ -17,18 +17,17 @@
package org.apache.lucene.benchmark.byTask;
+import junit.framework.TestCase;
+import org.apache.lucene.benchmark.byTask.tasks.PerfTask;
+import org.apache.lucene.benchmark.byTask.tasks.TaskSequence;
+import org.apache.lucene.benchmark.byTask.utils.Algorithm;
+
import java.io.File;
import java.io.StringReader;
import java.lang.reflect.Modifier;
import java.util.ArrayList;
import java.util.Iterator;
-import org.apache.lucene.benchmark.byTask.tasks.PerfTask;
-import org.apache.lucene.benchmark.byTask.tasks.TaskSequence;
-import org.apache.lucene.benchmark.byTask.utils.Algorithm;
-
-import junit.framework.TestCase;
-
/**
* Test very simply that perf tasks are parses as expected.
*/
@@ -96,6 +95,8 @@
tsks.add( " SetProp " );
tsks.add( " SetProp(name,value) " );
tsks.add( " Warm " );
+ tsks.add( "SearchTravRetLoadFieldSelector");
+ tsks.add("SearchTravRetLoadFieldSelector(body,title)");
// if tasks.dir property is defined, look for additional tasks.
// this somewhat covers tasks that would be added in the future, in case