You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by gs...@apache.org on 2007/03/23 14:13:47 UTC

svn commit: r521717 - in /lucene/java/trunk/contrib/benchmark/src: java/org/apache/lucene/benchmark/byTask/feeds/ java/org/apache/lucene/benchmark/byTask/tasks/ test/org/apache/lucene/benchmark/byTask/

Author: gsingers
Date: Fri Mar 23 06:13:46 2007
New Revision: 521717

URL: http://svn.apache.org/viewvc?view=rev&rev=521717
Log:
LUCENE-837:
Fixed issues related to Doron's comments on Mar. 22, 07.

PUt in tests for ***FieldSelectorTask.  Moved the byte field handling from ReutersDocMaker to BasicDocMaker.

Added in tests for new Task

Modified:
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetLoadFieldSelectorTask.java
    lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksParse.java

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java?view=diff&rev=521717&r1=521716&r2=521717
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java Fri Mar 23 06:13:46 2007
@@ -24,6 +24,7 @@
 import org.apache.lucene.document.Field;
 
 import java.io.File;
+import java.io.UnsupportedEncodingException;
 import java.util.ArrayList;
 import java.util.Date;
 import java.util.Iterator;
@@ -34,17 +35,24 @@
  * Create documents for the test.
  * Maintains counters of chars etc. so that sub-classes just need to 
  * provide textual content, and the create-by-size is handled here.
+ *
+ * <p/>
+ * Config Params (default is in caps):
+ * doc.stored=true|FALSE<br/>
+ * doc.tokenized=TRUE|false<br/>
+ * doc.term.vector=true|FALSE<br/>
+ * doc.store.bytes=true|FALSE //Store the body contents raw UTF-8 bytes as a field<br/>
  */
 public abstract class BasicDocMaker implements DocMaker {
   
   private int numDocsCreated = 0;
-  
+  private boolean storeBytes = false;
+
   static class DocData {
     String name;
     Date date;
     String title;
     String body;
-    byte [] bytes;
     Properties props;
   }
   
@@ -91,7 +99,7 @@
   // create a doc
   // use only part of the body, modify it to keep the rest (or use all if size==0).
   // reset the docdata properties so they are not added more than once.
-  private Document createDocument(DocData docData, int size, int cnt) {
+  private Document createDocument(DocData docData, int size, int cnt) throws UnsupportedEncodingException {
     int docid = incrNumDocsCreated();
     Document doc = new Document();
     doc.add(new Field("docid", "doc"+docid, storeVal, indexVal, termVecVal));
@@ -123,11 +131,11 @@
         docData.body = docData.body.substring(size); // some left
       }
       doc.add(new Field(BODY_FIELD, bdy, storeVal, indexVal, termVecVal));
+      if (storeBytes == true) {
+        doc.add(new Field("bytes", bdy.getBytes("UTF-8"), Field.Store.YES));
+      }
     }
-    if (docData.bytes != null && docData.bytes.length != 0)
-    {
-      doc.add(new Field("bytes", docData.bytes, Field.Store.YES));
-    }
+
     if (docData.props!=null) {
       for (Iterator it = docData.props.keySet().iterator(); it.hasNext(); ) {
         String key = (String) it.next();
@@ -186,6 +194,7 @@
     storeVal = (stored ? Field.Store.YES : Field.Store.NO);
     indexVal = (tokenized ? Field.Index.TOKENIZED : Field.Index.UN_TOKENIZED);
     termVecVal = (termVec ? Field.TermVector.YES : Field.TermVector.NO);
+    storeBytes = config.get("doc.store.body.bytes", false);
   }
 
   /*

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java?view=diff&rev=521717&r1=521716&r2=521717
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java Fri Mar 23 06:13:46 2007
@@ -33,7 +33,7 @@
  *
  * Config properties:
  * docs.dir=&lt;path to the docs dir| Default: reuters-out&gt;
- * reuters.doc.maker.store.bytes=true|false Default: false
+
  *
  */
 public class ReutersDocMaker extends BasicDocMaker {
@@ -43,7 +43,7 @@
   private ArrayList inputFiles = new ArrayList();
   private int nextFile = 0;
   private int iteration=0;
-  private boolean storeBytes = false;
+  
   /* (non-Javadoc)
    * @see SimpleDocMaker#setConfig(java.util.Properties)
    */
@@ -51,7 +51,7 @@
     super.setConfig(config);
     String d = config.get("docs.dir","reuters-out");
     dataDir = new File(new File("work"),d);
-    storeBytes = config.get("reuters.doc.maker.store.bytes", false);
+
 
     collectFiles(dataDir,inputFiles);
     if (inputFiles.size()==0) {
@@ -96,10 +96,6 @@
     dd.name = name;
     dd.title = title;
     dd.body = bodyBuf.toString();
-    if (storeBytes == true)
-    {
-      dd.bytes = dd.body.getBytes("UTF-8");
-    }
     return dd;
   }
 

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java?view=diff&rev=521717&r1=521716&r2=521717
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java Fri Mar 23 06:13:46 2007
@@ -30,8 +30,8 @@
  * <br>
  * Tasks performing some work that should be measured for the task, can overide setup() and/or tearDown() and 
  * placed that work there. 
- * <br>
- * Relevant properties: <code>task.max.depth.log</code>.
+ * <br>
+ * Relevant properties: <code>task.max.depth.log</code>.
  */
 public abstract class PerfTask implements Cloneable {
 
@@ -41,7 +41,7 @@
   private String name;
   private int depth = 0;
   private int maxDepthLogStart = 0;
-  private String params = null;
+  protected String params = null;
   
   protected static final String NEW_LINE = System.getProperty("line.separator");
 
@@ -202,24 +202,24 @@
   }
 
   /**
-   * Sub classes that supports parameters must overide this method to return true.
-   * @return true iff this task supports command line params.
-   */
-  public boolean supportsParams () {
-    return false;
-  }
-  
-  /**
+   * Sub classes that supports parameters must overide this method to return true.
+   * @return true iff this task supports command line params.
+   */
+  public boolean supportsParams () {
+    return false;
+  }
+  
+  /**
    * Set the params of this task.
-   * @exception UnsupportedOperationException for tasks supporting command line parameters.
+   * @exception UnsupportedOperationException for tasks supporting command line parameters.
    */
   public void setParams(String params) {
-    if (!supportsParams()) {
-      throw new UnsupportedOperationException(getName()+" does not support command line parameters.");
-    }
+    if (!supportsParams()) {
+      throw new UnsupportedOperationException(getName()+" does not support command line parameters.");
+    }
     this.params = params;
   }
-  
+  
   /**
    * @return Returns the Params.
    */

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetLoadFieldSelectorTask.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetLoadFieldSelectorTask.java?view=diff&rev=521717&r1=521716&r2=521717
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetLoadFieldSelectorTask.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetLoadFieldSelectorTask.java Fri Mar 23 06:13:46 2007
@@ -53,11 +53,20 @@
   }
 
   public void setParams(String params) {
+    this.params = params;
     Set fieldsToLoad = new HashSet();
     for (StringTokenizer tokenizer = new StringTokenizer(params, ","); tokenizer.hasMoreTokens();) {
       String s = tokenizer.nextToken();
       fieldsToLoad.add(s);
     }
     fieldSelector = new SetBasedFieldSelector(fieldsToLoad, Collections.EMPTY_SET);
+  }
+
+
+  /* (non-Javadoc)
+  * @see org.apache.lucene.benchmark.byTask.tasks.PerfTask#supportsParams()
+  */
+  public boolean supportsParams() {
+    return true;
   }
 }

Modified: lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksParse.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksParse.java?view=diff&rev=521717&r1=521716&r2=521717
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksParse.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksParse.java Fri Mar 23 06:13:46 2007
@@ -17,18 +17,17 @@
 
 package org.apache.lucene.benchmark.byTask;
 
+import junit.framework.TestCase;
+import org.apache.lucene.benchmark.byTask.tasks.PerfTask;
+import org.apache.lucene.benchmark.byTask.tasks.TaskSequence;
+import org.apache.lucene.benchmark.byTask.utils.Algorithm;
+
 import java.io.File;
 import java.io.StringReader;
 import java.lang.reflect.Modifier;
 import java.util.ArrayList;
 import java.util.Iterator;
 
-import org.apache.lucene.benchmark.byTask.tasks.PerfTask;
-import org.apache.lucene.benchmark.byTask.tasks.TaskSequence;
-import org.apache.lucene.benchmark.byTask.utils.Algorithm;
-
-import junit.framework.TestCase;
-
 /**
  * Test very simply that perf tasks are parses as expected.
  */
@@ -96,6 +95,8 @@
     tsks.add(  " SetProp                  "  );
     tsks.add(  " SetProp(name,value)      "  );
     tsks.add(  " Warm                     "  );
+    tsks.add(  "SearchTravRetLoadFieldSelector");
+    tsks.add("SearchTravRetLoadFieldSelector(body,title)");
     
     // if tasks.dir property is defined, look for additional tasks.
     // this somewhat covers tasks that would be added in the future, in case