You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by do...@apache.org on 2011/10/09 20:01:37 UTC

svn commit: r1180674 - in /lucene/dev/trunk: ./ lucene/ lucene/contrib/ modules/benchmark/ modules/benchmark/conf/ modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/ modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ modul...

Author: doronc
Date: Sun Oct  9 18:01:36 2011
New Revision: 1180674

URL: http://svn.apache.org/viewvc?rev=1180674&view=rev
Log:
LUCENE-3261: Facet benchmarking - indexing support - ported from 3x.

Added:
    lucene/dev/trunk/modules/benchmark/conf/facets.alg
      - copied unchanged from r1180637, lucene/dev/branches/branch_3x/lucene/contrib/benchmark/conf/facets.alg
    lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentItemsSource.java
      - copied unchanged from r1180637, lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentItemsSource.java
    lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FacetSource.java
      - copied unchanged from r1180637, lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FacetSource.java
    lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/RandomFacetSource.java
      - copied unchanged from r1180637, lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/RandomFacetSource.java
    lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddFacetedDocTask.java
      - copied unchanged from r1180637, lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddFacetedDocTask.java
    lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyIndexTask.java
      - copied unchanged from r1180637, lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyIndexTask.java
    lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyReaderTask.java
      - copied unchanged from r1180637, lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyReaderTask.java
    lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CommitTaxonomyIndexTask.java
      - copied unchanged from r1180637, lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CommitTaxonomyIndexTask.java
    lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateTaxonomyIndexTask.java
      - copied unchanged from r1180637, lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateTaxonomyIndexTask.java
    lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyIndexTask.java
      - copied unchanged from r1180637, lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyIndexTask.java
    lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyReaderTask.java
      - copied unchanged from r1180637, lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyReaderTask.java
Modified:
    lucene/dev/trunk/   (props changed)
    lucene/dev/trunk/lucene/   (props changed)
    lucene/dev/trunk/lucene/contrib/contrib-build.xml
    lucene/dev/trunk/modules/benchmark/CHANGES.txt
    lucene/dev/trunk/modules/benchmark/build.xml
    lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
    lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java
    lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
    lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java
    lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java
    lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java
    lucene/dev/trunk/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
    lucene/dev/trunk/solr/   (props changed)

Modified: lucene/dev/trunk/lucene/contrib/contrib-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/contrib-build.xml?rev=1180674&r1=1180673&r2=1180674&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/contrib-build.xml (original)
+++ lucene/dev/trunk/lucene/contrib/contrib-build.xml Sun Oct  9 18:01:36 2011
@@ -141,6 +141,17 @@
     <property name="analyzers-common.uptodate" value="true"/>
   </target>
 
+  <property name="facet.jar" value="${common.dir}/../modules/facet/build/lucene-facet-${version}.jar"/>
+  <target name="check-facet-uptodate" unless="facet.uptodate">
+    <module-uptodate name="facet" jarfile="${facet.jar}" property="facet.uptodate"/>
+  </target>
+  <target name="jar-facet" unless="facet.uptodate" depends="check-facet-uptodate">
+    <ant dir="${common.dir}/../modules/facet" target="jar-core" inheritall="false">
+      <propertyset refid="uptodate.and.compiled.properties"/>
+    </ant>
+    <property name="facet.uptodate" value="true"/>
+  </target>
+
   <property name="analyzers-icu.jar" value="${common.dir}/../modules/analysis/build/icu/lucene-analyzers-icu-${version}.jar"/>
   <target name="check-analyzers-icu-uptodate" unless="analyzers-icu.uptodate">
     <module-uptodate name="analysis/icu" jarfile="${analyzers-icu.jar}" property="analyzers-icu.uptodate"/>

Modified: lucene/dev/trunk/modules/benchmark/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/CHANGES.txt?rev=1180674&r1=1180673&r2=1180674&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/CHANGES.txt (original)
+++ lucene/dev/trunk/modules/benchmark/CHANGES.txt Sun Oct  9 18:01:36 2011
@@ -5,6 +5,10 @@ The Benchmark contrib package contains c
 For more information on past and future Lucene versions, please see:
 http://s.apache.org/luceneversions
 
+10/07/2011
+  LUCENE-3262: Facet benchmarking - Benchmark tasks and sources were added for indexing
+  with facets, demonstrated in facets.alg. (Gilad Barkai, Doron Cohen)
+    
 09/25/2011
   LUCENE-3457: Upgrade commons-compress to 1.2 (and undo LUCENE-2980's workaround).
   (Doron Cohen)

Modified: lucene/dev/trunk/modules/benchmark/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/build.xml?rev=1180674&r1=1180673&r2=1180674&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/build.xml (original)
+++ lucene/dev/trunk/modules/benchmark/build.xml Sun Oct  9 18:01:36 2011
@@ -153,6 +153,7 @@
       <pathelement path="${highlighter.jar}"/>
       <pathelement path="${analyzers-common.jar}"/>
       <pathelement path="${queryparser.jar}"/>
+      <pathelement path="${facet.jar}"/>
       <path refid="base.classpath"/>
     	<fileset dir="lib">
     		<include name="**/*.jar"/>
@@ -241,7 +242,7 @@
       <echo>Benchmark output in JIRA table format is in file: ${shingle.jira.output.file}</echo>
     </target>
 
-    <target name="init" depends="contrib-build.init,jar-memory,jar-highlighter,jar-analyzers-common,jar-queryparser"/>
+    <target name="init" depends="contrib-build.init,jar-memory,jar-highlighter,jar-analyzers-common,jar-queryparser,jar-facet"/>
   
     <target name="clean-javacc">
       <fileset dir="src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml" includes="*.java">

Modified: lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java?rev=1180674&r1=1180673&r2=1180674&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java (original)
+++ lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java Sun Oct  9 18:01:36 2011
@@ -24,6 +24,7 @@ import java.util.Locale;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
+import org.apache.lucene.benchmark.byTask.feeds.FacetSource;
 import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
 import org.apache.lucene.benchmark.byTask.stats.Points;
 import org.apache.lucene.benchmark.byTask.tasks.ReadTask;
@@ -31,12 +32,15 @@ import org.apache.lucene.benchmark.byTas
 import org.apache.lucene.benchmark.byTask.utils.Config;
 import org.apache.lucene.benchmark.byTask.utils.FileUtils;
 import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.IOUtils;
 
 /**
  * Data maintained by a performance test run.
@@ -45,11 +49,21 @@ import org.apache.lucene.store.RAMDirect
  * <ul>
  *  <li>Configuration.
  *  <li>Directory, Writer, Reader.
- *  <li>Docmaker and a few instances of QueryMaker.
+ *  <li>Taxonomy Directory, Writer, Reader.
+ *  <li>DocMaker, FacetSource and a few instances of QueryMaker.
  *  <li>Analyzer.
  *  <li>Statistics data which updated during the run.
  * </ul>
- * Config properties: work.dir=&lt;path to root of docs and index dirs| Default: work&gt;
+ * Config properties:
+ * <ul>
+ *  <li><b>work.dir</b>=&lt;path to root of docs and index dirs| Default: work&gt;
+ *  <li><b>analyzer</b>=&lt;class name for analyzer| Default: StandardAnalyzer&gt;
+ *  <li><b>doc.maker</b>=&lt;class name for doc-maker| Default: DocMaker&gt;
+ *  <li><b>facet.source</b>=&lt;class name for facet-source| Default: RandomFacetSource&gt;
+ *  <li><b>query.maker</b>=&lt;class name for query-maker| Default: SimpleQueryMaker&gt;
+ *  <li><b>log.queries</b>=&lt;whether queries should be printed| Default: false&gt;
+ *  <li><b>directory</b>=&lt;type of directory to use for the index| Default: RAMDirectory&gt;
+ *  <li><b>taxonomy.directory</b>=&lt;type of directory for taxonomy index| Default: RAMDirectory&gt;
  * </ul>
  */
 public class PerfRunData {
@@ -62,7 +76,12 @@ public class PerfRunData {
   private Directory directory;
   private Analyzer analyzer;
   private DocMaker docMaker;
+  private FacetSource facetSource;
   private Locale locale;
+
+  private Directory taxonomyDir;
+  private TaxonomyWriter taxonomyWriter;
+  private TaxonomyReader taxonomyReader;
   
   // we use separate (identical) instances for each "read" task type, so each can iterate the quries separately.
   private HashMap<Class<? extends ReadTask>,QueryMaker> readTaskQueryMaker;
@@ -73,6 +92,7 @@ public class PerfRunData {
   private IndexWriter indexWriter;
   private Config config;
   private long startTimeMillis;
+
   
   // constructor
   public PerfRunData (Config config) throws Exception {
@@ -84,6 +104,10 @@ public class PerfRunData {
     docMaker = Class.forName(config.get("doc.maker",
         "org.apache.lucene.benchmark.byTask.feeds.DocMaker")).asSubclass(DocMaker.class).newInstance();
     docMaker.setConfig(config);
+    // facet source
+    facetSource = Class.forName(config.get("facet.source",
+        "org.apache.lucene.benchmark.byTask.feeds.RandomFacetSource")).asSubclass(FacetSource.class).newInstance();
+    facetSource.setConfig(config);
     // query makers
     readTaskQueryMaker = new HashMap<Class<? extends ReadTask>,QueryMaker>();
     qmkrClass = Class.forName(config.get("query.maker","org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker")).asSubclass(QueryMaker.class);
@@ -104,30 +128,17 @@ public class PerfRunData {
   public void reinit(boolean eraseIndex) throws Exception {
 
     // cleanup index
-    if (indexWriter!=null) {
-      indexWriter.close();
-      indexWriter = null;
-    }
-    if (indexReader!=null) {
-      indexReader.close();
-      indexReader = null;
-    }
-    if (directory!=null) {
-      directory.close();
-    }
+    IOUtils.close(indexWriter, indexReader, directory);
+    indexWriter = null;
+    indexReader = null;
+
+    IOUtils.close(taxonomyWriter, taxonomyReader, taxonomyDir);
+    taxonomyWriter = null;
+    taxonomyReader = null;
     
     // directory (default is ram-dir).
-    if ("FSDirectory".equals(config.get("directory","RAMDirectory"))) {
-      File workDir = new File(config.get("work.dir","work"));
-      File indexDir = new File(workDir,"index");
-      if (eraseIndex && indexDir.exists()) {
-        FileUtils.fullyDelete(indexDir);
-      }
-      indexDir.mkdirs();
-      directory = FSDirectory.open(indexDir);
-    } else {
-      directory = new RAMDirectory();
-    }
+    directory = createDirectory(eraseIndex, "index", "directory");
+    taxonomyDir = createDirectory(eraseIndex, "taxo", "taxonomy.directory");
 
     // inputs
     resetInputs();
@@ -139,6 +150,21 @@ public class PerfRunData {
     // Re-init clock
     setStartTimeMillis();
   }
+
+  private Directory createDirectory(boolean eraseIndex, String dirName,
+      String dirParam) throws IOException {
+    if ("FSDirectory".equals(config.get(dirParam,"RAMDirectory"))) {
+      File workDir = new File(config.get("work.dir","work"));
+      File indexDir = new File(workDir,dirName);
+      if (eraseIndex && indexDir.exists()) {
+        FileUtils.fullyDelete(indexDir);
+      }
+      indexDir.mkdirs();
+      return FSDirectory.open(indexDir);
+    } 
+
+    return new RAMDirectory();
+  }
   
   public long setStartTimeMillis() {
     startTimeMillis = System.currentTimeMillis();
@@ -174,6 +200,57 @@ public class PerfRunData {
   }
 
   /**
+   * @return Returns the taxonomy directory
+   */
+  public Directory getTaxonomyDir() {
+    return taxonomyDir;
+  }
+  
+  /**
+   * Set the taxonomy reader. Takes ownership of that taxonomy reader, that is,
+   * internally performs taxoReader.incRef() (If caller no longer needs that 
+   * reader it should decRef()/close() it after calling this method, otherwise, 
+   * the reader will remain open). 
+   * @param taxoReader The taxonomy reader to set.
+   */
+  public synchronized void setTaxonomyReader(TaxonomyReader taxoReader) throws IOException {
+    if (taxoReader == this.taxonomyReader) {
+      return;
+    }
+    if (taxonomyReader != null) {
+      taxonomyReader.decRef();
+    }
+    
+    if (taxoReader != null) {
+      taxoReader.incRef();
+    }
+    this.taxonomyReader = taxoReader;
+  }
+  
+  /**
+   * @return Returns the taxonomyReader.  NOTE: this returns a
+   * reference.  You must call TaxonomyReader.decRef() when
+   * you're done.
+   */
+  public synchronized TaxonomyReader getTaxonomyReader() {
+    if (taxonomyReader != null) {
+      taxonomyReader.incRef();
+    }
+    return taxonomyReader;
+  }
+  
+  /**
+   * @param taxoWriter The taxonomy writer to set.
+   */
+  public void setTaxonomyWriter(TaxonomyWriter taxoWriter) {
+    this.taxonomyWriter = taxoWriter;
+  }
+  
+  public TaxonomyWriter getTaxonomyWriter() {
+    return taxonomyWriter;
+  }
+  
+  /**
    * @return Returns the indexReader.  NOTE: this returns a
    * reference.  You must call IndexReader.decRef() when
    * you're done.
@@ -198,13 +275,22 @@ public class PerfRunData {
   }
 
   /**
+   * Set the index reader. Takes ownership of that index reader, that is,
+   * internally performs indexReader.incRef() (If caller no longer needs that 
+   * reader it should decRef()/close() it after calling this method, otherwise, 
+   * the reader will remain open). 
    * @param indexReader The indexReader to set.
    */
   public synchronized void setIndexReader(IndexReader indexReader) throws IOException {
+    if (indexReader == this.indexReader) {
+      return;
+    }
+    
     if (this.indexReader != null) {
       // Release current IR
       this.indexReader.decRef();
     }
+
     this.indexReader = indexReader;
     if (indexReader != null) {
       // Hold reference to new IR
@@ -246,6 +332,11 @@ public class PerfRunData {
     return docMaker;
   }
 
+  /** Returns the facet source. */
+  public FacetSource getFacetSource() {
+    return facetSource;
+  }
+
   /**
    * @return the locale
    */
@@ -269,6 +360,7 @@ public class PerfRunData {
 
   public void resetInputs() throws IOException {
     docMaker.resetInputs();
+    facetSource.resetInputs();
     for (final QueryMaker queryMaker : readTaskQueryMaker.values()) {
       queryMaker.resetInputs();
     }

Modified: lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java?rev=1180674&r1=1180673&r2=1180674&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java (original)
+++ lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java Sun Oct  9 18:01:36 2011
@@ -17,12 +17,7 @@ package org.apache.lucene.benchmark.byTa
  * limitations under the License.
  */
 
-import java.io.File;
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-
-import org.apache.lucene.benchmark.byTask.utils.Config;
 
 /**
  * Represents content from a specified source, such as TREC, Reuters etc. A
@@ -31,119 +26,13 @@ import org.apache.lucene.benchmark.byTas
  * of various statistics, such as how many documents were generated, size in
  * bytes etc.
  * <p>
- * Supports the following configuration parameters:
- * <ul>
- * <li><b>content.source.forever</b> - specifies whether to generate documents
- * forever (<b>default=true</b>).
- * <li><b>content.source.verbose</b> - specifies whether messages should be
- * output by the content source (<b>default=false</b>).
- * <li><b>content.source.encoding</b> - specifies which encoding to use when
- * reading the files of that content source. Certain implementations may define
- * a default value if this parameter is not specified. (<b>default=null</b>).
- * <li><b>content.source.log.step</b> - specifies for how many documents a
- * message should be logged. If set to 0 it means no logging should occur.
- * <b>NOTE:</b> if verbose is set to false, logging should not occur even if
- * logStep is not 0 (<b>default=0</b>).
- * </ul>
+ * For supported configuration parameters see {@link ContentItemsSource}.
  */
-public abstract class ContentSource {
-  
-  private long bytesCount;
-  private long totalBytesCount;
-  private int docsCount;
-  private int totalDocsCount;
-  private Config config;
-
-  protected boolean forever;
-  protected int logStep;
-  protected boolean verbose;
-  protected String encoding;
-  
-  /** update count of bytes generated by this source */  
-  protected final synchronized void addBytes(long numBytes) {
-    bytesCount += numBytes;
-    totalBytesCount += numBytes;
-  }
-  
-  /** update count of documents generated by this source */  
-  protected final synchronized void addDoc() {
-    ++docsCount;
-    ++totalDocsCount;
-  }
-
-  /**
-   * A convenience method for collecting all the files of a content source from
-   * a given directory. The collected {@link File} instances are stored in the
-   * given <code>files</code>.
-   */
-  protected final void collectFiles(File dir, ArrayList<File> files) {
-    if (!dir.canRead()) {
-      return;
-    }
-    
-    File[] dirFiles = dir.listFiles();
-    Arrays.sort(dirFiles);
-    for (int i = 0; i < dirFiles.length; i++) {
-      File file = dirFiles[i];
-      if (file.isDirectory()) {
-        collectFiles(file, files);
-      } else if (file.canRead()) {
-        files.add(file);
-      }
-    }
-  }
-
-	/**
-   * Returns true whether it's time to log a message (depending on verbose and
-   * the number of documents generated).
-   */
-  protected final boolean shouldLog() {
-    return verbose && logStep > 0 && docsCount % logStep == 0;
-  }
-
-  /** Called when reading from this content source is no longer required. */
-  public abstract void close() throws IOException;
+public abstract class ContentSource extends ContentItemsSource {
   
-  /** Returns the number of bytes generated since last reset. */
-  public final long getBytesCount() { return bytesCount; }
-
-  /** Returns the number of generated documents since last reset. */
-  public final int getDocsCount() { return docsCount; }
-  
-  public final Config getConfig() { return config; }
-
-  /** Returns the next {@link DocData} from the content source. */
+  /** Returns the next {@link DocData} from the content source. 
+   * Implementations must account for multi-threading, as multiple threads 
+   * can call this method simultaneously. */
   public abstract DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException;
 
-  /** Returns the total number of bytes that were generated by this source. */ 
-  public final long getTotalBytesCount() { return totalBytesCount; }
-
-  /** Returns the total number of generated documents. */
-  public final int getTotalDocsCount() { return totalDocsCount; }
-
-  /**
-   * Resets the input for this content source, so that the test would behave as
-   * if it was just started, input-wise.
-   * <p>
-   * <b>NOTE:</b> the default implementation resets the number of bytes and
-   * documents generated since the last reset, so it's important to call
-   * super.resetInputs in case you override this method.
-   */
-  public void resetInputs() throws IOException {
-    bytesCount = 0;
-    docsCount = 0;
-  }
-
-  /**
-   * Sets the {@link Config} for this content source. If you override this
-   * method, you must call super.setConfig.
-   */
-  public void setConfig(Config config) {
-    this.config = config;
-    forever = config.get("content.source.forever", true);
-    logStep = config.get("content.source.log.step", 0);
-    verbose = config.get("content.source.verbose", false);
-    encoding = config.get("content.source.encoding", null);
-  }
-
 }

Modified: lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java?rev=1180674&r1=1180673&r2=1180674&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java (original)
+++ lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java Sun Oct  9 18:01:36 2011
@@ -31,7 +31,6 @@ import java.text.SimpleDateFormat;
 import java.text.ParsePosition;
 
 import org.apache.lucene.benchmark.byTask.utils.Config;
-import org.apache.lucene.benchmark.byTask.utils.Format;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
@@ -186,13 +185,8 @@ public class DocMaker {
   protected boolean reuseFields;
   protected boolean indexProperties;
   
-  private int lastPrintedNumUniqueTexts = 0;
-
-  private long lastPrintedNumUniqueBytes = 0;
   private final AtomicInteger numDocsCreated = new AtomicInteger();
 
-  private int printNum = 0;
-
   public DocMaker() {
   }
   
@@ -400,38 +394,9 @@ public class DocMaker {
     return doc;
   }
   
-  public void printDocStatistics() {
-    boolean print = false;
-    String col = "                  ";
-    StringBuilder sb = new StringBuilder();
-    String newline = System.getProperty("line.separator");
-    sb.append("------------> ").append(getClass().getSimpleName()).append(" statistics (").append(printNum).append("): ").append(newline);
-    int nut = source.getTotalDocsCount();
-    if (nut > lastPrintedNumUniqueTexts) {
-      print = true;
-      sb.append("total count of unique texts: ").append(Format.format(0,nut,col)).append(newline);
-      lastPrintedNumUniqueTexts = nut;
-    }
-    long nub = getTotalBytesCount();
-    if (nub > lastPrintedNumUniqueBytes) {
-      print = true;
-      sb.append("total bytes of unique texts: ").append(Format.format(0,nub,col)).append(newline);
-      lastPrintedNumUniqueBytes = nub;
-    }
-    if (source.getDocsCount() > 0) {
-      print = true;
-      sb.append("num docs added since last inputs reset:   ").append(Format.format(0,source.getDocsCount(),col)).append(newline);
-      sb.append("total bytes added since last inputs reset: ").append(Format.format(0,getBytesCount(),col)).append(newline);
-    }
-    if (print) {
-      System.out.println(sb.append(newline).toString());
-      printNum++;
-    }
-  }
-  
   /** Reset inputs so that the test run would behave, input wise, as if it just started. */
   public synchronized void resetInputs() throws IOException {
-    printDocStatistics();
+    source.printStatistics("docs");
     // re-initiate since properties by round may have changed.
     setConfig(config);
     source.resetInputs();

Modified: lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java?rev=1180674&r1=1180673&r2=1180674&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java (original)
+++ lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java Sun Oct  9 18:01:36 2011
@@ -289,7 +289,7 @@ public class TrecContentSource extends C
     // here, everything else is already private to that thread, so we're safe.
     try {
       docData = trecDocParser.parse(docData, name, this, docBuf, parsedPathType);
-      addDoc();
+      addItem();
     } catch (InterruptedException ie) {
       throw new ThreadInterruptedException(ie);
     }

Modified: lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java?rev=1180674&r1=1180673&r2=1180674&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java (original)
+++ lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java Sun Oct  9 18:01:36 2011
@@ -17,12 +17,14 @@ package org.apache.lucene.benchmark.byTa
  * limitations under the License.
  */
 
+import java.text.NumberFormat;
+
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
 import org.apache.lucene.document.Document;
 
 /**
- * Add a document, optionally with of a certain size.
+ * Add a document, optionally of a certain size.
  * <br>Other side effects: none.
  * <br>Takes optional param: document size. 
  */
@@ -34,9 +36,12 @@ public class AddDocTask extends PerfTask
 
   private int docSize = 0;
   
-  // volatile data passed between setup(), doLogic(), tearDown().
-  private Document doc = null;
-  
+  /** 
+   * volatile data passed between setup(), doLogic(), tearDown().
+   * the doc is created at setup() and added at doLogic(). 
+   */
+  protected Document doc = null;
+
   @Override
   public void setup() throws Exception {
     super.setup();
@@ -56,7 +61,7 @@ public class AddDocTask extends PerfTask
 
   @Override
   protected String getLogMessage(int recsCount) {
-    return "added " + recsCount + " docs";
+    return String.format("added %9d docs",recsCount);
   }
   
   @Override

Modified: lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java?rev=1180674&r1=1180673&r2=1180674&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java (original)
+++ lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java Sun Oct  9 18:01:36 2011
@@ -17,8 +17,6 @@ package org.apache.lucene.benchmark.byTa
  * limitations under the License.
  */
 
-import java.text.NumberFormat;
-
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.benchmark.byTask.stats.Points;
 import org.apache.lucene.benchmark.byTask.stats.TaskStats;
@@ -270,9 +268,7 @@ public abstract class PerfTask implement
   public void tearDown() throws Exception {
     if (++logStepCount % logStep == 0) {
       double time = (System.currentTimeMillis() - runData.getStartTimeMillis()) / 1000.0;
-      NumberFormat nf = NumberFormat.getInstance();
-      nf.setMaximumFractionDigits(2);
-      System.out.println(nf.format(time) + " sec --> "
+      System.out.println(String.format("%7.2f",time) + " sec --> "
           + Thread.currentThread().getName() + " " + getLogMessage(logStepCount));
     }
   }

Modified: lucene/dev/trunk/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java?rev=1180674&r1=1180673&r2=1180674&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (original)
+++ lucene/dev/trunk/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java Sun Oct  9 18:01:36 2011
@@ -40,6 +40,7 @@ import org.apache.lucene.benchmark.byTas
 import org.apache.lucene.collation.CollationKeyAnalyzer;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.FieldsEnum;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
@@ -781,6 +782,42 @@ public class TestPerfTasksLogic extends 
   }
 
   /**
+   * Test indexing with facets tasks.
+   */
+  public void testIndexingWithFacets() throws Exception {
+    // 1. alg definition (required in every "logic" test)
+    String algLines[] = {
+        "# ----- properties ",
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
+        "content.source.log.step=100",
+        "content.source.forever=false",
+        "directory=RAMDirectory",
+        "doc.stored=false",
+        "merge.factor=3",
+        "doc.tokenized=false",
+        "debug.level=1",
+        "# ----- alg ",
+        "ResetSystemErase",
+        "CreateIndex",
+        "CreateTaxonomyIndex",
+        "{ \"AddDocs\"  AddFacetedDoc > : * ",
+        "CloseIndex",
+        "CloseTaxonomyIndex",
+        "OpenTaxonomyReader",
+    };
+
+    // 2. execute the algorithm  (required in every "logic" test)
+    Benchmark benchmark = execBenchmark(algLines);
+    PerfRunData runData = benchmark.getRunData();
+    assertNull("taxo writer was not properly closed",runData.getTaxonomyWriter());
+    TaxonomyReader taxoReader = runData.getTaxonomyReader();
+    assertNotNull("taxo reader was not opened", taxoReader);
+    assertTrue("nothing was added to the taxnomy (expecting root and at least one addtional category)",taxoReader.getSize()>1);
+    taxoReader.close();
+  }
+  
+  /**
    * Test that we can call optimize(maxNumSegments).
    */
   public void testOptimizeMaxNumSegments() throws Exception {