You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by ot...@apache.org on 2007/04/03 01:37:17 UTC
svn commit: r524969 [1/5] - in /lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark: ./ byTask/ byTask/feeds/ byTask/programmatic/ byTask/stats/ byTask/tasks/ byTask/utils/ standard/ stats/ utils/

Author: otis
Date: Mon Apr  2 16:37:14 2007
New Revision: 524969

URL: http://svn.apache.org/viewvc?view=rev&rev=524969
Log:
- find . -name \*java -type f | xargs perl -pi -e 's/(\r\n|\n|\r)/\n/g' \
  && find . -name \*java -type f | xargs svn propset svn:eol-style native


Modified:
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/AbstractBenchmarker.java   (props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/BenchmarkOptions.java   (props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/Benchmarker.java   (props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/Constants.java   (props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/Driver.java   (props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/QueryMaker.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleQueryMaker.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleSloppyPhraseQueryMaker.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocMaker.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/programmatic/Sample.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/Points.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/Report.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/TaskStats.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ClearStatsTask.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseIndexTask.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseReaderTask.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/DeleteDocTask.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewRoundTask.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenIndexTask.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenReaderTask.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OptimizeTask.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/RepAllTask.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/RepSelectByPrefTask.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByNameRoundTask.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByNameTask.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByPrefRoundTask.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByPrefTask.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReportTask.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ResetInputsTask.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ResetSystemEraseTask.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ResetSystemSoftTask.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTask.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetTask.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravTask.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SetPropTask.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WarmTask.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Config.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/FileUtils.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Format.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/standard/ReutersQueries.java   (props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/standard/StandardBenchmarker.java   (props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/standard/StandardOptions.java   (props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/stats/MemUsage.java   (props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/stats/QueryData.java   (props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/stats/TestData.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/stats/TestRunData.java   (props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/stats/TimeData.java   (contents, props changed)
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java   (props changed)

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/AbstractBenchmarker.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/BenchmarkOptions.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/Benchmarker.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/Constants.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/Driver.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java?view=diff&rev=524969&r1=524968&r2=524969
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java Mon Apr  2 16:37:14 2007
@@ -1,134 +1,134 @@
-package org.apache.lucene.benchmark.byTask;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.File;
-import java.io.FileReader;
-import java.io.Reader;
-
-import org.apache.lucene.benchmark.byTask.utils.Algorithm;
-import org.apache.lucene.benchmark.byTask.utils.Config;
-
-
-/**
- * Run the benchmark algorithm.
- * <p>Usage: java Benchmark  algorithm-file
- * <ol>
- * <li>Read algorithm.</li>
- * <li> Run the algorithm.</li>
- * </ol>
- * Things to be added/fixed in "Benchmarking by tasks":
- * <ol>
- * <li>TODO - report into Excel and/or graphed view.</li>
- * <li>TODO - perf comparison between Lucene releases over the years.</li>
- * <li>TODO - perf report adequate to include in Lucene nightly build site? (so we can easily track performance changes.)</li>
- * <li>TODO - add overall time control for repeated execution (vs. current by-count only).</li>
- * <li>TODO - query maker that is based on index statistics.</li>
- * </ol>
- */
-public class Benchmark {
-
-  private PerfRunData runData;
-  private Algorithm algorithm;
-  private boolean executed;
-  
-  public Benchmark (Reader algReader) throws Exception {
-    // prepare run data
-    try {
-      runData = new PerfRunData(new Config(algReader));
-    } catch (Exception e) {
-      e.printStackTrace();
-      throw new Exception("Error: cannot init PerfRunData!",e);
-    }
-    
-    // parse algorithm
-    try {
-      algorithm = new Algorithm(runData);
-    } catch (Exception e) {
-      throw new Exception("Error: cannot understand algorithm!",e);
-    }
-  }
-  
-  public synchronized void  execute() throws Exception {
-    if (executed) {
-      throw new IllegalStateException("Benchmark was already executed");
-    }
-    executed = true;
-    algorithm.execute();
-  }
-  
-  /**
-   * Run the benchmark algorithm.
-   * @param args benchmark config and algorithm files
-   */
-  public static void main(String[] args) {
-    // verify command line args
-    if (args.length < 1) {
-      System.err.println("Usage: java Benchmark <algorithm file>");
-      System.exit(1);
-    }
-    
-    // verify input files 
-    File algFile = new File(args[0]);
-    if (!algFile.exists() || !algFile.isFile() || !algFile.canRead()) {
-      System.err.println("cannot find/read algorithm file: "+algFile.getAbsolutePath()); 
-      System.exit(1);
-    }
-    
-    System.out.println("Running algorithm from: "+algFile.getAbsolutePath());
-    
-    Benchmark benchmark = null;
-    try {
-      benchmark = new Benchmark(new FileReader(algFile));
-    } catch (Exception e) {
-      e.printStackTrace();
-      System.exit(1);
-    }
-
-    System.out.println("------------> algorithm:");
-    System.out.println(benchmark.getAlgorithm().toString());
-
-    // execute
-    try {
-      benchmark.execute();
-    } catch (Exception e) {
-      System.err.println("Error: cannot execute the algorithm! "+e.getMessage());
-      e.printStackTrace();
-    }
-
-    System.out.println("####################");
-    System.out.println("###  D O N E !!! ###");
-    System.out.println("####################");
-
-  }
-
-  /**
-   * @return Returns the algorithm.
-   */
-  public Algorithm getAlgorithm() {
-    return algorithm;
-  }
-
-  /**
-   * @return Returns the runData.
-   */
-  public PerfRunData getRunData() {
-    return runData;
-  }
-
-}
+package org.apache.lucene.benchmark.byTask;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.Reader;
+
+import org.apache.lucene.benchmark.byTask.utils.Algorithm;
+import org.apache.lucene.benchmark.byTask.utils.Config;
+
+
+/**
+ * Run the benchmark algorithm.
+ * <p>Usage: java Benchmark  algorithm-file
+ * <ol>
+ * <li>Read algorithm.</li>
+ * <li> Run the algorithm.</li>
+ * </ol>
+ * Things to be added/fixed in "Benchmarking by tasks":
+ * <ol>
+ * <li>TODO - report into Excel and/or graphed view.</li>
+ * <li>TODO - perf comparison between Lucene releases over the years.</li>
+ * <li>TODO - perf report adequate to include in Lucene nightly build site? (so we can easily track performance changes.)</li>
+ * <li>TODO - add overall time control for repeated execution (vs. current by-count only).</li>
+ * <li>TODO - query maker that is based on index statistics.</li>
+ * </ol>
+ */
+public class Benchmark {
+
+  private PerfRunData runData;
+  private Algorithm algorithm;
+  private boolean executed;
+  
+  public Benchmark (Reader algReader) throws Exception {
+    // prepare run data
+    try {
+      runData = new PerfRunData(new Config(algReader));
+    } catch (Exception e) {
+      e.printStackTrace();
+      throw new Exception("Error: cannot init PerfRunData!",e);
+    }
+    
+    // parse algorithm
+    try {
+      algorithm = new Algorithm(runData);
+    } catch (Exception e) {
+      throw new Exception("Error: cannot understand algorithm!",e);
+    }
+  }
+  
+  public synchronized void  execute() throws Exception {
+    if (executed) {
+      throw new IllegalStateException("Benchmark was already executed");
+    }
+    executed = true;
+    algorithm.execute();
+  }
+  
+  /**
+   * Run the benchmark algorithm.
+   * @param args benchmark config and algorithm files
+   */
+  public static void main(String[] args) {
+    // verify command line args
+    if (args.length < 1) {
+      System.err.println("Usage: java Benchmark <algorithm file>");
+      System.exit(1);
+    }
+    
+    // verify input files 
+    File algFile = new File(args[0]);
+    if (!algFile.exists() || !algFile.isFile() || !algFile.canRead()) {
+      System.err.println("cannot find/read algorithm file: "+algFile.getAbsolutePath()); 
+      System.exit(1);
+    }
+    
+    System.out.println("Running algorithm from: "+algFile.getAbsolutePath());
+    
+    Benchmark benchmark = null;
+    try {
+      benchmark = new Benchmark(new FileReader(algFile));
+    } catch (Exception e) {
+      e.printStackTrace();
+      System.exit(1);
+    }
+
+    System.out.println("------------> algorithm:");
+    System.out.println(benchmark.getAlgorithm().toString());
+
+    // execute
+    try {
+      benchmark.execute();
+    } catch (Exception e) {
+      System.err.println("Error: cannot execute the algorithm! "+e.getMessage());
+      e.printStackTrace();
+    }
+
+    System.out.println("####################");
+    System.out.println("###  D O N E !!! ###");
+    System.out.println("####################");
+
+  }
+
+  /**
+   * @return Returns the algorithm.
+   */
+  public Algorithm getAlgorithm() {
+    return algorithm;
+  }
+
+  /**
+   * @return Returns the runData.
+   */
+  public PerfRunData getRunData() {
+    return runData;
+  }
+
+}

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java?view=diff&rev=524969&r1=524968&r2=524969
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java Mon Apr  2 16:37:14 2007
@@ -1,222 +1,222 @@
-package org.apache.lucene.benchmark.byTask;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.File;
+package org.apache.lucene.benchmark.byTask;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
 import java.util.HashMap;
 import java.util.Iterator;
 
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
-import org.apache.lucene.benchmark.byTask.feeds.HTMLParser;
-import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
-import org.apache.lucene.benchmark.byTask.stats.Points;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
+import org.apache.lucene.benchmark.byTask.feeds.HTMLParser;
+import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
+import org.apache.lucene.benchmark.byTask.stats.Points;
 import org.apache.lucene.benchmark.byTask.tasks.ReadTask;
 import org.apache.lucene.benchmark.byTask.tasks.SearchTask;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.benchmark.byTask.utils.Config;
-import org.apache.lucene.benchmark.byTask.utils.FileUtils;
-
-
-/**
- * Data maintained by a performance test run.
- * <p>
- * Data includes:
- * <ul>
- *  <li>Configuration.
- *  <li>Directory, Writer, Reader.
- *  <li>Docmaker and a few instances of QueryMaker.
- *  <li>Analyzer.
- *  <li>Statistics data which updated during the run.
- * </ul>
- */
-public class PerfRunData {
-
-  private Points points;
-  
-  // objects used during performance test run
-  // directory, analyzer, docMaker - created at startup.
-  // reader, writer, searcher - maintained by basic tasks. 
-  private Directory directory;
-  private Analyzer analyzer;
-  private DocMaker docMaker;
-  private HTMLParser htmlParser;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.benchmark.byTask.utils.FileUtils;
+
+
+/**
+ * Data maintained by a performance test run.
+ * <p>
+ * Data includes:
+ * <ul>
+ *  <li>Configuration.
+ *  <li>Directory, Writer, Reader.
+ *  <li>Docmaker and a few instances of QueryMaker.
+ *  <li>Analyzer.
+ *  <li>Statistics data which updated during the run.
+ * </ul>
+ */
+public class PerfRunData {
+
+  private Points points;
+  
+  // objects used during performance test run
+  // directory, analyzer, docMaker - created at startup.
+  // reader, writer, searcher - maintained by basic tasks. 
+  private Directory directory;
+  private Analyzer analyzer;
+  private DocMaker docMaker;
+  private HTMLParser htmlParser;
   
   // we use separate (identical) instances for each "read" task type, so each can iterate the quries separately.
   private HashMap readTaskQueryMaker;
   private Class qmkrClass;
-
-  private IndexReader indexReader;
-  private IndexWriter indexWriter;
-  private Config config;
-  
-  // constructor
-  public PerfRunData (Config config) throws Exception {
-    this.config = config;
-    // analyzer (default is standard analyzer)
-    analyzer = (Analyzer) Class.forName(config.get("analyzer",
-        "org.apache.lucene.analysis.standard.StandardAnalyzer")).newInstance();
-    // doc maker
-    docMaker = (DocMaker) Class.forName(config.get("doc.maker",
-        "org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker")).newInstance();
-    docMaker.setConfig(config);
-    // query makers
+
+  private IndexReader indexReader;
+  private IndexWriter indexWriter;
+  private Config config;
+  
+  // constructor
+  public PerfRunData (Config config) throws Exception {
+    this.config = config;
+    // analyzer (default is standard analyzer)
+    analyzer = (Analyzer) Class.forName(config.get("analyzer",
+        "org.apache.lucene.analysis.standard.StandardAnalyzer")).newInstance();
+    // doc maker
+    docMaker = (DocMaker) Class.forName(config.get("doc.maker",
+        "org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker")).newInstance();
+    docMaker.setConfig(config);
+    // query makers
     readTaskQueryMaker = new HashMap();
-    qmkrClass = Class.forName(config.get("query.maker","org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker"));
-    // html parser, used for some doc makers
-    htmlParser = (HTMLParser) Class.forName(config.get("html.parser","org.apache.lucene.benchmark.byTask.feeds.DemoHTMLParser")).newInstance();
+    qmkrClass = Class.forName(config.get("query.maker","org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker"));
+    // html parser, used for some doc makers
+    htmlParser = (HTMLParser) Class.forName(config.get("html.parser","org.apache.lucene.benchmark.byTask.feeds.DemoHTMLParser")).newInstance();
     docMaker.setHTMLParser(htmlParser);
 
-    // index stuff
-    reinit(false);
-    
-    // statistic points
-    points = new Points(config);
-    
-    if (Boolean.valueOf(config.get("log.queries","false")).booleanValue()) {
-      System.out.println("------------> queries:");
+    // index stuff
+    reinit(false);
+    
+    // statistic points
+    points = new Points(config);
+    
+    if (Boolean.valueOf(config.get("log.queries","false")).booleanValue()) {
+      System.out.println("------------> queries:");
       System.out.println(getQueryMaker(new SearchTask(this)).printQueries());
-    }
-
-  }
-
-  // clean old stuff, reopen 
-  public void reinit(boolean eraseIndex) throws Exception {
-
-    // cleanup index
-    if (indexWriter!=null) {
-      indexWriter.close();
-      indexWriter = null;
-    }
-    if (indexReader!=null) {
-      indexReader.close();
-      indexReader = null;
-    }
-    if (directory!=null) {
-      directory.close();
-    }
-    
-    // directory (default is ram-dir).
-    if ("FSDirectory".equals(config.get("directory","RAMDirectory"))) {
-      File workDir = new File("work");
-      File indexDir = new File(workDir,"index");
-      if (eraseIndex && indexDir.exists()) {
-        FileUtils.fullyDelete(indexDir);
-      }
-      indexDir.mkdirs();
+    }
+
+  }
+
+  // clean old stuff, reopen 
+  public void reinit(boolean eraseIndex) throws Exception {
+
+    // cleanup index
+    if (indexWriter!=null) {
+      indexWriter.close();
+      indexWriter = null;
+    }
+    if (indexReader!=null) {
+      indexReader.close();
+      indexReader = null;
+    }
+    if (directory!=null) {
+      directory.close();
+    }
+    
+    // directory (default is ram-dir).
+    if ("FSDirectory".equals(config.get("directory","RAMDirectory"))) {
+      File workDir = new File("work");
+      File indexDir = new File(workDir,"index");
+      if (eraseIndex && indexDir.exists()) {
+        FileUtils.fullyDelete(indexDir);
+      }
+      indexDir.mkdirs();
       directory = FSDirectory.getDirectory(indexDir);
-    } else {
-      directory = new RAMDirectory();
-    }
-
-    // inputs
-    resetInputs();
-    
-    // release unused stuff
-    System.runFinalization();
-    System.gc();
-  }
-
-  /**
-   * @return Returns the points.
-   */
-  public Points getPoints() {
-    return points;
-  }
-
-  /**
-   * @return Returns the directory.
-   */
-  public Directory getDirectory() {
-    return directory;
-  }
-
-  /**
-   * @param directory The directory to set.
-   */
-  public void setDirectory(Directory directory) {
-    this.directory = directory;
-  }
-
-  /**
-   * @return Returns the indexReader.
-   */
-  public IndexReader getIndexReader() {
-    return indexReader;
-  }
-
-  /**
-   * @param indexReader The indexReader to set.
-   */
-  public void setIndexReader(IndexReader indexReader) {
-    this.indexReader = indexReader;
-  }
-
-  /**
-   * @return Returns the indexWriter.
-   */
-  public IndexWriter getIndexWriter() {
-    return indexWriter;
-  }
-
-  /**
-   * @param indexWriter The indexWriter to set.
-   */
-  public void setIndexWriter(IndexWriter indexWriter) {
-    this.indexWriter = indexWriter;
-  }
-
-  /**
-   * @return Returns the anlyzer.
-   */
-  public Analyzer getAnalyzer() {
-    return analyzer;
-  }
-
-  /**
-   * @return Returns the docMaker.
-   */
-  public DocMaker getDocMaker() {
-    return docMaker;
-  }
-
-  /**
-   * @return Returns the config.
-   */
-  public Config getConfig() {
-    return config;
-  }
-
-  public void resetInputs() {
-    docMaker.resetInputs();
+    } else {
+      directory = new RAMDirectory();
+    }
+
+    // inputs
+    resetInputs();
+    
+    // release unused stuff
+    System.runFinalization();
+    System.gc();
+  }
+
+  /**
+   * @return Returns the points.
+   */
+  public Points getPoints() {
+    return points;
+  }
+
+  /**
+   * @return Returns the directory.
+   */
+  public Directory getDirectory() {
+    return directory;
+  }
+
+  /**
+   * @param directory The directory to set.
+   */
+  public void setDirectory(Directory directory) {
+    this.directory = directory;
+  }
+
+  /**
+   * @return Returns the indexReader.
+   */
+  public IndexReader getIndexReader() {
+    return indexReader;
+  }
+
+  /**
+   * @param indexReader The indexReader to set.
+   */
+  public void setIndexReader(IndexReader indexReader) {
+    this.indexReader = indexReader;
+  }
+
+  /**
+   * @return Returns the indexWriter.
+   */
+  public IndexWriter getIndexWriter() {
+    return indexWriter;
+  }
+
+  /**
+   * @param indexWriter The indexWriter to set.
+   */
+  public void setIndexWriter(IndexWriter indexWriter) {
+    this.indexWriter = indexWriter;
+  }
+
+  /**
+   * @return Returns the anlyzer.
+   */
+  public Analyzer getAnalyzer() {
+    return analyzer;
+  }
+
+  /**
+   * @return Returns the docMaker.
+   */
+  public DocMaker getDocMaker() {
+    return docMaker;
+  }
+
+  /**
+   * @return Returns the config.
+   */
+  public Config getConfig() {
+    return config;
+  }
+
+  public void resetInputs() {
+    docMaker.resetInputs();
     Iterator it = readTaskQueryMaker.values().iterator();
     while (it.hasNext()) {
       ((QueryMaker) it.next()).resetInputs();
     }
-  }
-
-  /**
+  }
+
+  /**
    * @return Returns the queryMaker by read task type (class)
-   */
+   */
   public QueryMaker getQueryMaker(ReadTask readTask) {
     // mapping the query maker by task class allows extending/adding new search/read tasks
     // without needing to modify this class.
@@ -232,13 +232,13 @@
       readTaskQueryMaker.put(readTaskClass,qm);
     }
     return qm;
-  }
-
-  /**
-   * @return Returns the htmlParser.
-   */
-  public HTMLParser getHtmlParser() {
-    return htmlParser;
-  }
-
-}
+  }
+
+  /**
+   * @return Returns the htmlParser.
+   */
+  public HTMLParser getHtmlParser() {
+    return htmlParser;
+  }
+
+}

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java?view=diff&rev=524969&r1=524968&r2=524969
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java Mon Apr  2 16:37:14 2007
@@ -1,309 +1,309 @@
-package org.apache.lucene.benchmark.byTask.feeds;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.benchmark.byTask.utils.Config;
-import org.apache.lucene.benchmark.byTask.utils.Format;
-import org.apache.lucene.document.DateTools;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-
-import java.io.File;
-import java.io.UnsupportedEncodingException;
-import java.util.ArrayList;
-import java.util.Iterator;
-
-
-/**
- * Create documents for the test.
- * Maintains counters of chars etc. so that sub-classes just need to 
- * provide textual content, and the create-by-size is handled here.
- *
- * <p/>
- * Config Params (default is in caps):
- * doc.stored=true|FALSE<br/>
- * doc.tokenized=TRUE|false<br/>
- * doc.term.vector=true|FALSE<br/>
- * doc.store.body.bytes=true|FALSE //Store the body contents raw UTF-8 bytes as a field<br/>
- */
-public abstract class BasicDocMaker implements DocMaker {
-  
-  private int numDocsCreated = 0;
-  private boolean storeBytes = false;
-  protected boolean forever;
-
-  private static class LeftOver {
-    private DocData docdata;
-    private int cnt;
-  }
-
-  // leftovers are thread local, because it is unsafe to share residues between threads
-  private ThreadLocal leftovr = new ThreadLocal();
-
-  static final String BODY_FIELD = "body";
-  private long numBytes = 0;
-  private long numUniqueBytes = 0;
-
-  protected Config config;
-
-  protected Field.Store storeVal = Field.Store.NO;
-  protected Field.Index indexVal = Field.Index.TOKENIZED;
-  protected Field.TermVector termVecVal = Field.TermVector.NO;
-  
-  private synchronized int incrNumDocsCreated() {
-    return numDocsCreated++;
-  }
-
-  /**
-   * Return the data of the next document.
-   * All current implementations can create docs forever. 
-   * When the input data is exhausted, input files are iterated.
-   * This re-iteration can be avoided by setting doc.maker.forever to false (default is true).
-   * @return data of the next document.
-   * @exception if cannot create the next doc data
-   * @exception NoMoreDataException if data is exhausted (and 'forever' set to false).
-   */
-  protected abstract DocData getNextDocData() throws NoMoreDataException, Exception;
-
-  /*
-   *  (non-Javadoc)
-   * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#makeDocument()
-   */
-  public Document makeDocument () throws Exception {
-    resetLeftovers();
-    DocData docData = getNextDocData();
-    Document doc = createDocument(docData,0,-1);
-    return doc;
-  }
-
-  // create a doc
-  // use only part of the body, modify it to keep the rest (or use all if size==0).
-  // reset the docdata properties so they are not added more than once.
-  private Document createDocument(DocData docData, int size, int cnt) throws UnsupportedEncodingException {
-    int docid = incrNumDocsCreated();
-    Document doc = new Document();
-    doc.add(new Field("docid", "doc"+docid, storeVal, indexVal, termVecVal));
-    if (docData.getName()!=null) {
-      String name = (cnt<0 ? docData.getName() : docData.getName()+"_"+cnt);
-      doc.add(new Field("docname", name, storeVal, indexVal, termVecVal));
-    }
-    if (docData.getDate()!=null) {
-      String dateStr = DateTools.dateToString(docData.getDate(), DateTools.Resolution.SECOND);
-      doc.add(new Field("docdate", dateStr, storeVal, indexVal, termVecVal));
-    }
-    if (docData.getTitle()!=null) {
-      doc.add(new Field("doctitle", docData.getTitle(), storeVal, indexVal, termVecVal));
-    }
-    if (docData.getBody()!=null && docData.getBody().length()>0) {
-      String bdy;
-      if (size<=0 || size>=docData.getBody().length()) {
-        bdy = docData.getBody(); // use all
-        docData.setBody("");  // nothing left
-      } else {
-        // attempt not to break words - if whitespace found within next 20 chars...
-        for (int n=size-1; n<size+20 && n<docData.getBody().length(); n++) {
-          if (Character.isWhitespace(docData.getBody().charAt(n))) {
-            size = n;
-            break;
-          }
-        }
-        bdy = docData.getBody().substring(0,size); // use part
-        docData.setBody(docData.getBody().substring(size)); // some left
-      }
-      doc.add(new Field(BODY_FIELD, bdy, storeVal, indexVal, termVecVal));
-      if (storeBytes == true) {
-        doc.add(new Field("bytes", bdy.getBytes("UTF-8"), Field.Store.YES));
-      }
-    }
-
-    if (docData.getProps()!=null) {
-      for (Iterator it = docData.getProps().keySet().iterator(); it.hasNext(); ) {
-        String key = (String) it.next();
-        String val = (String) docData.getProps().get(key);
-        doc.add(new Field(key, val, storeVal, indexVal, termVecVal));
-      }
-      docData.setProps(null);
-    }
-    //System.out.println("============== Created doc "+numDocsCreated+" :\n"+doc+"\n==========");
-    return doc;
-  }
-
-  /*
-   *  (non-Javadoc)
-   * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#makeDocument(int)
-   */
-  public Document makeDocument(int size) throws Exception {
-    LeftOver lvr = (LeftOver) leftovr.get();
-    if (lvr==null || lvr.docdata==null || lvr.docdata.getBody()==null || lvr.docdata.getBody().length()==0) {
-      resetLeftovers();
-    }
-    DocData dd = (lvr==null ? getNextDocData() : lvr.docdata);
-    int cnt = (lvr==null ? 0 : lvr.cnt);
-    while (dd.getBody()==null || dd.getBody().length()<size) {
-      DocData dd2 = dd;
-      dd = getNextDocData();
-      cnt = 0;
-      dd.setBody(dd2.getBody() + dd.getBody());
-    }
-    Document doc = createDocument(dd,size,cnt);
-    if (dd.getBody()==null || dd.getBody().length()==0) {
-      resetLeftovers();
-    } else {
-      if (lvr == null) {
-        lvr = new LeftOver();
-        leftovr.set(lvr);
-      }
-      lvr.docdata = dd;
-      lvr.cnt = ++cnt;
-    }
-    return doc;
-  }
-
-  private void resetLeftovers() {
-    leftovr.set(null);
-  }
-
-  /* (non-Javadoc)
-   * @see DocMaker#setConfig(java.util.Properties)
-   */
-  public void setConfig(Config config) {
-    this.config = config;
-    boolean stored = config.get("doc.stored",false); 
-    boolean tokenized = config.get("doc.tokenized",true);
-    boolean termVec = config.get("doc.term.vector",false);
-    storeVal = (stored ? Field.Store.YES : Field.Store.NO);
-    indexVal = (tokenized ? Field.Index.TOKENIZED : Field.Index.UN_TOKENIZED);
-    termVecVal = (termVec ? Field.TermVector.YES : Field.TermVector.NO);
-    storeBytes = config.get("doc.store.body.bytes", false);
-    forever = config.get("doc.maker.forever",true);
-  }
-
-  /*
-   *  (non-Javadoc)
-   * @see DocMaker#resetIinputs()
-   */
-  public synchronized void resetInputs() {
-    printDocStatistics();
-    numBytes = 0;
-    numDocsCreated = 0;
-    resetLeftovers();
-  }
-
-  /*
-   *  (non-Javadoc)
-   * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#numUniqueBytes()
-   */
-  public long numUniqueBytes() {
-    return numUniqueBytes;
-  }
-
-  /*
-   *  (non-Javadoc)
-   * @see DocMaker#getCount()
-   */
-  public synchronized int getCount() {
-    return numDocsCreated;
-  }
-
-  /*
-   *  (non-Javadoc)
-   * @see DocMaker#getByteCount()
-   */
-  public synchronized long getByteCount() {
-    return numBytes;
-  }
-
-  protected void addUniqueBytes (long n) {
-    numUniqueBytes += n;
-  }
-  
-  protected synchronized void addBytes (long n) {
-    numBytes += n;
-  }
-
-  /*
-   *  (non-Javadoc)
-   * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#printDocStatistics()
-   */
-  private int lastPrintedNumUniqueTexts = 0;
-  private long lastPrintedNumUniqueBytes = 0;
-  private int printNum = 0;
-  private HTMLParser htmlParser;
-  
-  public void printDocStatistics() {
-    boolean print = false;
-    String col = "                  ";
-    StringBuffer sb = new StringBuffer();
-    String newline = System.getProperty("line.separator");
-    sb.append("------------> ").append(Format.simpleName(getClass())).append(" statistics (").append(printNum).append("): ").append(newline);
-    int nut = numUniqueTexts();
-    if (nut > lastPrintedNumUniqueTexts) {
-      print = true;
-      sb.append("total count of unique texts: ").append(Format.format(0,nut,col)).append(newline);
-      lastPrintedNumUniqueTexts = nut;
-    }
-    long nub = numUniqueBytes();
-    if (nub > lastPrintedNumUniqueBytes) {
-      print = true;
-      sb.append("total bytes of unique texts: ").append(Format.format(0,nub,col)).append(newline);
-      lastPrintedNumUniqueBytes = nub;
-    }
-    if (getCount()>0) {
-      print = true;
-      sb.append("num docs added since last inputs reset:   ").append(Format.format(0,getCount(),col)).append(newline);
-      sb.append("total bytes added since last inputs reset: ").append(Format.format(0,getByteCount(),col)).append(newline);
-    }
-    if (print) {
-      System.out.println(sb.append(newline).toString());
-      printNum++;
-    }
-  }
-
-  protected void collectFiles(File f, ArrayList inputFiles) {
-    //System.out.println("Collect: "+f.getAbsolutePath());
-    if (!f.canRead()) {
-      return;
-    }
-    if (f.isDirectory()) {
-      File files[] = f.listFiles();
-      for (int i = 0; i < files.length; i++) {
-        collectFiles(files[i],inputFiles);
-      }
-      return;
-    }
-    inputFiles.add(f);
-    addUniqueBytes(f.length());
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#setHTMLParser(org.apache.lucene.benchmark.byTask.feeds.HTMLParser)
-   */
-  public void setHTMLParser(HTMLParser htmlParser) {
-    this.htmlParser = htmlParser;
-  }
-
-  /*
-   *  (non-Javadoc)
-   * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#getHtmlParser()
-   */
-  public HTMLParser getHtmlParser() {
-    return htmlParser;
-  }
-
-
-}
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.benchmark.byTask.utils.Format;
+import org.apache.lucene.document.DateTools;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+
+import java.io.File;
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.Iterator;
+
+
+/**
+ * Create documents for the test.
+ * Maintains counters of chars etc. so that sub-classes just need to 
+ * provide textual content, and the create-by-size is handled here.
+ *
+ * <p/>
+ * Config Params (default is in caps):
+ * doc.stored=true|FALSE<br/>
+ * doc.tokenized=TRUE|false<br/>
+ * doc.term.vector=true|FALSE<br/>
+ * doc.store.body.bytes=true|FALSE //Store the body contents raw UTF-8 bytes as a field<br/>
+ */
+public abstract class BasicDocMaker implements DocMaker {
+  
+  private int numDocsCreated = 0;
+  private boolean storeBytes = false;
+  protected boolean forever;
+
+  private static class LeftOver {
+    private DocData docdata;
+    private int cnt;
+  }
+
+  // leftovers are thread local, because it is unsafe to share residues between threads
+  private ThreadLocal leftovr = new ThreadLocal();
+
+  static final String BODY_FIELD = "body";
+  private long numBytes = 0;
+  private long numUniqueBytes = 0;
+
+  protected Config config;
+
+  protected Field.Store storeVal = Field.Store.NO;
+  protected Field.Index indexVal = Field.Index.TOKENIZED;
+  protected Field.TermVector termVecVal = Field.TermVector.NO;
+  
+  private synchronized int incrNumDocsCreated() {
+    return numDocsCreated++;
+  }
+
+  /**
+   * Return the data of the next document.
+   * All current implementations can create docs forever. 
+   * When the input data is exhausted, input files are iterated.
+   * This re-iteration can be avoided by setting doc.maker.forever to false (default is true).
+   * @return data of the next document.
+   * @exception if cannot create the next doc data
+   * @exception NoMoreDataException if data is exhausted (and 'forever' set to false).
+   */
+  protected abstract DocData getNextDocData() throws NoMoreDataException, Exception;
+
+  /*
+   *  (non-Javadoc)
+   * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#makeDocument()
+   */
+  public Document makeDocument () throws Exception {
+    resetLeftovers();
+    DocData docData = getNextDocData();
+    Document doc = createDocument(docData,0,-1);
+    return doc;
+  }
+
+  // create a doc
+  // use only part of the body, modify it to keep the rest (or use all if size==0).
+  // reset the docdata properties so they are not added more than once.
+  private Document createDocument(DocData docData, int size, int cnt) throws UnsupportedEncodingException {
+    int docid = incrNumDocsCreated();
+    Document doc = new Document();
+    doc.add(new Field("docid", "doc"+docid, storeVal, indexVal, termVecVal));
+    if (docData.getName()!=null) {
+      String name = (cnt<0 ? docData.getName() : docData.getName()+"_"+cnt);
+      doc.add(new Field("docname", name, storeVal, indexVal, termVecVal));
+    }
+    if (docData.getDate()!=null) {
+      String dateStr = DateTools.dateToString(docData.getDate(), DateTools.Resolution.SECOND);
+      doc.add(new Field("docdate", dateStr, storeVal, indexVal, termVecVal));
+    }
+    if (docData.getTitle()!=null) {
+      doc.add(new Field("doctitle", docData.getTitle(), storeVal, indexVal, termVecVal));
+    }
+    if (docData.getBody()!=null && docData.getBody().length()>0) {
+      String bdy;
+      if (size<=0 || size>=docData.getBody().length()) {
+        bdy = docData.getBody(); // use all
+        docData.setBody("");  // nothing left
+      } else {
+        // attempt not to break words - if whitespace found within next 20 chars...
+        for (int n=size-1; n<size+20 && n<docData.getBody().length(); n++) {
+          if (Character.isWhitespace(docData.getBody().charAt(n))) {
+            size = n;
+            break;
+          }
+        }
+        bdy = docData.getBody().substring(0,size); // use part
+        docData.setBody(docData.getBody().substring(size)); // some left
+      }
+      doc.add(new Field(BODY_FIELD, bdy, storeVal, indexVal, termVecVal));
+      if (storeBytes == true) {
+        doc.add(new Field("bytes", bdy.getBytes("UTF-8"), Field.Store.YES));
+      }
+    }
+
+    if (docData.getProps()!=null) {
+      for (Iterator it = docData.getProps().keySet().iterator(); it.hasNext(); ) {
+        String key = (String) it.next();
+        String val = (String) docData.getProps().get(key);
+        doc.add(new Field(key, val, storeVal, indexVal, termVecVal));
+      }
+      docData.setProps(null);
+    }
+    //System.out.println("============== Created doc "+numDocsCreated+" :\n"+doc+"\n==========");
+    return doc;
+  }
+
+  /*
+   *  (non-Javadoc)
+   * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#makeDocument(int)
+   */
+  public Document makeDocument(int size) throws Exception {
+    LeftOver lvr = (LeftOver) leftovr.get();
+    if (lvr==null || lvr.docdata==null || lvr.docdata.getBody()==null || lvr.docdata.getBody().length()==0) {
+      resetLeftovers();
+    }
+    DocData dd = (lvr==null ? getNextDocData() : lvr.docdata);
+    int cnt = (lvr==null ? 0 : lvr.cnt);
+    while (dd.getBody()==null || dd.getBody().length()<size) {
+      DocData dd2 = dd;
+      dd = getNextDocData();
+      cnt = 0;
+      dd.setBody(dd2.getBody() + dd.getBody());
+    }
+    Document doc = createDocument(dd,size,cnt);
+    if (dd.getBody()==null || dd.getBody().length()==0) {
+      resetLeftovers();
+    } else {
+      if (lvr == null) {
+        lvr = new LeftOver();
+        leftovr.set(lvr);
+      }
+      lvr.docdata = dd;
+      lvr.cnt = ++cnt;
+    }
+    return doc;
+  }
+
+  private void resetLeftovers() {
+    leftovr.set(null);
+  }
+
+  /* (non-Javadoc)
+   * @see DocMaker#setConfig(java.util.Properties)
+   */
+  public void setConfig(Config config) {
+    this.config = config;
+    boolean stored = config.get("doc.stored",false); 
+    boolean tokenized = config.get("doc.tokenized",true);
+    boolean termVec = config.get("doc.term.vector",false);
+    storeVal = (stored ? Field.Store.YES : Field.Store.NO);
+    indexVal = (tokenized ? Field.Index.TOKENIZED : Field.Index.UN_TOKENIZED);
+    termVecVal = (termVec ? Field.TermVector.YES : Field.TermVector.NO);
+    storeBytes = config.get("doc.store.body.bytes", false);
+    forever = config.get("doc.maker.forever",true);
+  }
+
+  /*
+   *  (non-Javadoc)
+   * @see DocMaker#resetIinputs()
+   */
+  public synchronized void resetInputs() {
+    printDocStatistics();
+    numBytes = 0;
+    numDocsCreated = 0;
+    resetLeftovers();
+  }
+
+  /*
+   *  (non-Javadoc)
+   * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#numUniqueBytes()
+   */
+  public long numUniqueBytes() {
+    return numUniqueBytes;
+  }
+
+  /*
+   *  (non-Javadoc)
+   * @see DocMaker#getCount()
+   */
+  public synchronized int getCount() {
+    return numDocsCreated;
+  }
+
+  /*
+   *  (non-Javadoc)
+   * @see DocMaker#getByteCount()
+   */
+  public synchronized long getByteCount() {
+    return numBytes;
+  }
+
+  protected void addUniqueBytes (long n) {
+    numUniqueBytes += n;
+  }
+  
+  protected synchronized void addBytes (long n) {
+    numBytes += n;
+  }
+
+  /*
+   *  (non-Javadoc)
+   * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#printDocStatistics()
+   */
+  private int lastPrintedNumUniqueTexts = 0;
+  private long lastPrintedNumUniqueBytes = 0;
+  private int printNum = 0;
+  private HTMLParser htmlParser;
+  
+  public void printDocStatistics() {
+    boolean print = false;
+    String col = "                  ";
+    StringBuffer sb = new StringBuffer();
+    String newline = System.getProperty("line.separator");
+    sb.append("------------> ").append(Format.simpleName(getClass())).append(" statistics (").append(printNum).append("): ").append(newline);
+    int nut = numUniqueTexts();
+    if (nut > lastPrintedNumUniqueTexts) {
+      print = true;
+      sb.append("total count of unique texts: ").append(Format.format(0,nut,col)).append(newline);
+      lastPrintedNumUniqueTexts = nut;
+    }
+    long nub = numUniqueBytes();
+    if (nub > lastPrintedNumUniqueBytes) {
+      print = true;
+      sb.append("total bytes of unique texts: ").append(Format.format(0,nub,col)).append(newline);
+      lastPrintedNumUniqueBytes = nub;
+    }
+    if (getCount()>0) {
+      print = true;
+      sb.append("num docs added since last inputs reset:   ").append(Format.format(0,getCount(),col)).append(newline);
+      sb.append("total bytes added since last inputs reset: ").append(Format.format(0,getByteCount(),col)).append(newline);
+    }
+    if (print) {
+      System.out.println(sb.append(newline).toString());
+      printNum++;
+    }
+  }
+
+  protected void collectFiles(File f, ArrayList inputFiles) {
+    //System.out.println("Collect: "+f.getAbsolutePath());
+    if (!f.canRead()) {
+      return;
+    }
+    if (f.isDirectory()) {
+      File files[] = f.listFiles();
+      for (int i = 0; i < files.length; i++) {
+        collectFiles(files[i],inputFiles);
+      }
+      return;
+    }
+    inputFiles.add(f);
+    addUniqueBytes(f.length());
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#setHTMLParser(org.apache.lucene.benchmark.byTask.feeds.HTMLParser)
+   */
+  public void setHTMLParser(HTMLParser htmlParser) {
+    this.htmlParser = htmlParser;
+  }
+
+  /*
+   *  (non-Javadoc)
+   * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#getHtmlParser()
+   */
+  public HTMLParser getHtmlParser() {
+    return htmlParser;
+  }
+
+
+}

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java?view=diff&rev=524969&r1=524968&r2=524969
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java Mon Apr  2 16:37:14 2007
@@ -1,71 +1,71 @@
-package org.apache.lucene.benchmark.byTask.feeds;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.benchmark.byTask.utils.Config;
-
-
-/**
- * Create documents for the test.
- * <br>Each call to makeDocument would create the next document.
- * When input is exhausted, the DocMaker iterates over the input again, 
- * does provifing a source for unlimited number of documents, 
- * though not all of them are unique. 
- */
-public interface DocMaker {
-
-  /** 
-   * Create the next document, of the given size by input bytes.
-   * If the implementation does not support control over size, an exception is thrown.
-   * @param size size of document, or 0 if there is no size requirement.
-   * @exception if cannot make the document, or if size>0 was specified but this feature is not supported.
-   */ 
-  public Document makeDocument (int size) throws Exception;
-
-  /** Create the next document. */
-  public Document makeDocument () throws Exception;
-
-  /** Set the properties */
-  public void setConfig (Config config);
-  
-  /** Reset inputs so that the test run would behave, input wise, as if it just started. */
-  public void resetInputs();
-  
-  /** Return how many real unique texts are available, 0 if not applicable. */ 
-  public int numUniqueTexts();
-  
-  /** Return total bytes of all available unique texts, 0 if not applicable */ 
-  public long numUniqueBytes();
-
-  /** Return number of docs made since last reset. */
-  public int getCount();
-
-  /** Return total byte size of docs made since last reset. */
-  public long getByteCount();
-
-  /** Print some statistics on docs available/added/etc. */ 
-  public void printDocStatistics();
-
-  /** Set the html parser to use, when appropriate */
-  public void setHTMLParser(HTMLParser htmlParser);
-  
-  /** Returns the htmlParser. */
-  public HTMLParser getHtmlParser();
-
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.benchmark.byTask.utils.Config;
+
+
+/**
+ * Create documents for the test.
+ * <br>Each call to makeDocument would create the next document.
+ * When input is exhausted, the DocMaker iterates over the input again, 
+ * does provifing a source for unlimited number of documents, 
+ * though not all of them are unique. 
+ */
+public interface DocMaker {
+
+  /** 
+   * Create the next document, of the given size by input bytes.
+   * If the implementation does not support control over size, an exception is thrown.
+   * @param size size of document, or 0 if there is no size requirement.
+   * @exception if cannot make the document, or if size>0 was specified but this feature is not supported.
+   */ 
+  public Document makeDocument (int size) throws Exception;
+
+  /** Create the next document. */
+  public Document makeDocument () throws Exception;
+
+  /** Set the properties */
+  public void setConfig (Config config);
+  
+  /** Reset inputs so that the test run would behave, input wise, as if it just started. */
+  public void resetInputs();
+  
+  /** Return how many real unique texts are available, 0 if not applicable. */ 
+  public int numUniqueTexts();
+  
+  /** Return total bytes of all available unique texts, 0 if not applicable */ 
+  public long numUniqueBytes();
+
+  /** Return number of docs made since last reset. */
+  public int getCount();
+
+  /** Return total byte size of docs made since last reset. */
+  public long getByteCount();
+
+  /** Print some statistics on docs available/added/etc. */ 
+  public void printDocStatistics();
+
+  /** Set the html parser to use, when appropriate */
+  public void setHTMLParser(HTMLParser htmlParser);
+  
+  /** Returns the htmlParser. */
+  public HTMLParser getHtmlParser();
+
 }

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/QueryMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/QueryMaker.java?view=diff&rev=524969&r1=524968&r2=524969
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/QueryMaker.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/QueryMaker.java Mon Apr  2 16:37:14 2007
@@ -1,49 +1,49 @@
-package org.apache.lucene.benchmark.byTask.feeds;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-import org.apache.lucene.search.Query;
-import org.apache.lucene.benchmark.byTask.utils.Config;
-
-
-/**
- * Create queries for the test.
- */
-public interface QueryMaker {
-
-  /** 
-   * Create the next query, of the given size.
-   * @param size the size of the query - number of terms, etc.
-   * @exception if cannot make the query, or if size>0 was specified but this feature is not supported.
-   */ 
-  public Query makeQuery (int size) throws Exception;
-
-  /** Create the next query */ 
-  public Query makeQuery () throws Exception;
-
-  /** Set the properties 
-   * @throws Exception */
-  public void setConfig (Config config) throws Exception;
-  
-  /** Reset inputs so that the test run would behave, input wise, as if it just started. */
-  public void resetInputs();
-  
-  /** Print the queries */
-  public String printQueries();
-}
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import org.apache.lucene.search.Query;
+import org.apache.lucene.benchmark.byTask.utils.Config;
+
+
+/**
+ * Create queries for the test.
+ */
+public interface QueryMaker {
+
+  /** 
+   * Create the next query, of the given size.
+   * @param size the size of the query - number of terms, etc.
+   * @exception if cannot make the query, or if size>0 was specified but this feature is not supported.
+   */ 
+  public Query makeQuery (int size) throws Exception;
+
+  /** Create the next query */ 
+  public Query makeQuery () throws Exception;
+
+  /** Set the properties 
+   * @throws Exception */
+  public void setConfig (Config config) throws Exception;
+  
+  /** Reset inputs so that the test run would behave, input wise, as if it just started. */
+  public void resetInputs();
+  
+  /** Print the queries */
+  public String printQueries();
+}

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/QueryMaker.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java?view=diff&rev=524969&r1=524968&r2=524969
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java Mon Apr  2 16:37:14 2007
@@ -1,121 +1,121 @@
-package org.apache.lucene.benchmark.byTask.feeds;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.benchmark.byTask.utils.Config;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.text.DateFormat;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Date;
-import java.util.Locale;
-
-
-/**
- * A DocMaker using the Reuters collection for its input.
- *
- * Config properties:
- * docs.dir=&lt;path to the docs dir| Default: reuters-out&gt;
-
- *
- */
-public class ReutersDocMaker extends BasicDocMaker {
-
-  private DateFormat dateFormat;
-  private File dataDir = null;
-  private ArrayList inputFiles = new ArrayList();
-  private int nextFile = 0;
-  private int iteration=0;
-  
-  /* (non-Javadoc)
-   * @see SimpleDocMaker#setConfig(java.util.Properties)
-   */
-  public void setConfig(Config config) {
-    super.setConfig(config);
-    String d = config.get("docs.dir","reuters-out");
-    dataDir = new File(new File("work"),d);
-
-
-    collectFiles(dataDir,inputFiles);
-    if (inputFiles.size()==0) {
-      throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath());
-    }
-    // date format: 30-MAR-1987 14:22:36.87
-    dateFormat = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.US);
-    dateFormat.setLenient(true);
-  }
-
-  protected DocData getNextDocData() throws Exception {
-    File f = null;
-    String name = null;
-    synchronized (this) {
-      if (nextFile >= inputFiles.size()) { 
-        // exhausted files, start a new round, unless forever set to false.
-        if (!forever) {
-          throw new NoMoreDataException();
-        }
-        nextFile = 0;
-        iteration++;
-      }
-      f = (File) inputFiles.get(nextFile++);
-      name = f.getCanonicalPath()+"_"+iteration;
-    }
-    
-    BufferedReader reader = new BufferedReader(new FileReader(f));
-    String line = null;
-    //First line is the date, 3rd is the title, rest is body
-    String dateStr = reader.readLine();
-    reader.readLine();//skip an empty line
-    String title = reader.readLine();
-    reader.readLine();//skip an empty line
-    StringBuffer bodyBuf = new StringBuffer(1024);
-    while ((line = reader.readLine()) != null) {
-      bodyBuf.append(line).append(' ');
-    }
-    reader.close();
-    
-    addBytes(f.length());
-
-    
-    Date date = dateFormat.parse(dateStr.trim()); 
-    return new DocData(name, bodyBuf.toString(), title, null, date);
-  }
-
-
-  /*
-   *  (non-Javadoc)
-   * @see DocMaker#resetIinputs()
-   */
-  public synchronized void resetInputs() {
-    super.resetInputs();
-    nextFile = 0;
-    iteration = 0;
-  }
-
-  /*
-   *  (non-Javadoc)
-   * @see DocMaker#numUniqueTexts()
-   */
-  public int numUniqueTexts() {
-    return inputFiles.size();
-  }
-
-}
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.benchmark.byTask.utils.Config;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.Locale;
+
+
+/**
+ * A DocMaker using the Reuters collection for its input.
+ *
+ * Config properties:
+ * docs.dir=&lt;path to the docs dir| Default: reuters-out&gt;
+
+ *
+ */
+public class ReutersDocMaker extends BasicDocMaker {
+
+  private DateFormat dateFormat;
+  private File dataDir = null;
+  private ArrayList inputFiles = new ArrayList();
+  private int nextFile = 0;
+  private int iteration=0;
+  
+  /* (non-Javadoc)
+   * @see SimpleDocMaker#setConfig(java.util.Properties)
+   */
+  public void setConfig(Config config) {
+    super.setConfig(config);
+    String d = config.get("docs.dir","reuters-out");
+    dataDir = new File(new File("work"),d);
+
+
+    collectFiles(dataDir,inputFiles);
+    if (inputFiles.size()==0) {
+      throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath());
+    }
+    // date format: 30-MAR-1987 14:22:36.87
+    dateFormat = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.US);
+    dateFormat.setLenient(true);
+  }
+
+  protected DocData getNextDocData() throws Exception {
+    File f = null;
+    String name = null;
+    synchronized (this) {
+      if (nextFile >= inputFiles.size()) { 
+        // exhausted files, start a new round, unless forever set to false.
+        if (!forever) {
+          throw new NoMoreDataException();
+        }
+        nextFile = 0;
+        iteration++;
+      }
+      f = (File) inputFiles.get(nextFile++);
+      name = f.getCanonicalPath()+"_"+iteration;
+    }
+    
+    BufferedReader reader = new BufferedReader(new FileReader(f));
+    String line = null;
+    //First line is the date, 3rd is the title, rest is body
+    String dateStr = reader.readLine();
+    reader.readLine();//skip an empty line
+    String title = reader.readLine();
+    reader.readLine();//skip an empty line
+    StringBuffer bodyBuf = new StringBuffer(1024);
+    while ((line = reader.readLine()) != null) {
+      bodyBuf.append(line).append(' ');
+    }
+    reader.close();
+    
+    addBytes(f.length());
+
+    
+    Date date = dateFormat.parse(dateStr.trim()); 
+    return new DocData(name, bodyBuf.toString(), title, null, date);
+  }
+
+
+  /*
+   *  (non-Javadoc)
+   * @see DocMaker#resetIinputs()
+   */
+  public synchronized void resetInputs() {
+    super.resetInputs();
+    nextFile = 0;
+    iteration = 0;
+  }
+
+  /*
+   *  (non-Javadoc)
+   * @see DocMaker#numUniqueTexts()
+   */
+  public int numUniqueTexts() {
+    return inputFiles.size();
+  }
+
+}

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java?view=diff&rev=524969&r1=524968&r2=524969
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java Mon Apr  2 16:37:14 2007
@@ -1,117 +1,117 @@
-package org.apache.lucene.benchmark.byTask.feeds;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.queryParser.QueryParser;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.WildcardQuery;
-import org.apache.lucene.search.spans.SpanFirstQuery;
-import org.apache.lucene.search.spans.SpanNearQuery;
-import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.search.spans.SpanTermQuery;
-
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.search.spans.SpanFirstQuery;
+import org.apache.lucene.search.spans.SpanNearQuery;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
-
 
-/**
- * A QueryMaker that makes queries devised manually (by Grant Ingersoll) for
- * searching in the Reuters collection.
- */
+
+/**
+ * A QueryMaker that makes queries devised manually (by Grant Ingersoll) for
+ * searching in the Reuters collection.
+ */
 public class ReutersQueryMaker extends AbstractQueryMaker implements QueryMaker {
 
-  private static String [] STANDARD_QUERIES = {
-    //Start with some short queries
-    "Salomon", "Comex", "night trading", "Japan Sony",
-    //Try some Phrase Queries
-    "\"Sony Japan\"", "\"food needs\"~3",
-    "\"World Bank\"^2 AND Nigeria", "\"World Bank\" -Nigeria",
-    "\"Ford Credit\"~5",
-    //Try some longer queries
-    "airline Europe Canada destination",
-    "Long term pressure by trade " +
-    "ministers is necessary if the current Uruguay round of talks on " +
-    "the General Agreement on Trade and Tariffs (GATT) is to " +
-    "succeed"
-  };
-  
-  private static Query[] getPrebuiltQueries(String field) {
-    //  be wary of unanalyzed text
-    return new Query[] {
-        new SpanFirstQuery(new SpanTermQuery(new Term(field, "ford")), 5),
-        new SpanNearQuery(new SpanQuery[]{new SpanTermQuery(new Term(field, "night")), new SpanTermQuery(new Term(field, "trading"))}, 4, false),
-        new SpanNearQuery(new SpanQuery[]{new SpanFirstQuery(new SpanTermQuery(new Term(field, "ford")), 10), new SpanTermQuery(new Term(field, "credit"))}, 10, false),
-        new WildcardQuery(new Term(field, "fo*")),
-    };
-  }
-  
-  /**
-   * Parse the strings containing Lucene queries.
-   *
-   * @param qs array of strings containing query expressions
-   * @param a  analyzer to use when parsing queries
-   * @return array of Lucene queries
-   */
-  private static Query[] createQueries(List qs, Analyzer a) {
-    QueryParser qp = new QueryParser("body", a);
-    List queries = new ArrayList();
-    for (int i = 0; i < qs.size(); i++)  {
-      try {
-        
-        Object query = qs.get(i);
-        Query q = null;
-        if (query instanceof String) {
-          q = qp.parse((String) query);
-          
-        } else if (query instanceof Query) {
-          q = (Query) query;
-          
-        } else {
-          System.err.println("Unsupported Query Type: " + query);
-        }
-        
-        if (q != null) {
-          queries.add(q);
-        }
-        
-      } catch (Exception e)  {
-        e.printStackTrace();
-      }
-    }
-    
-    return (Query[]) queries.toArray(new Query[0]);
-  }
-  
+  private static String [] STANDARD_QUERIES = {
+    //Start with some short queries
+    "Salomon", "Comex", "night trading", "Japan Sony",
+    //Try some Phrase Queries
+    "\"Sony Japan\"", "\"food needs\"~3",
+    "\"World Bank\"^2 AND Nigeria", "\"World Bank\" -Nigeria",
+    "\"Ford Credit\"~5",
+    //Try some longer queries
+    "airline Europe Canada destination",
+    "Long term pressure by trade " +
+    "ministers is necessary if the current Uruguay round of talks on " +
+    "the General Agreement on Trade and Tariffs (GATT) is to " +
+    "succeed"
+  };
+  
+  private static Query[] getPrebuiltQueries(String field) {
+    //  be wary of unanalyzed text
+    return new Query[] {
+        new SpanFirstQuery(new SpanTermQuery(new Term(field, "ford")), 5),
+        new SpanNearQuery(new SpanQuery[]{new SpanTermQuery(new Term(field, "night")), new SpanTermQuery(new Term(field, "trading"))}, 4, false),
+        new SpanNearQuery(new SpanQuery[]{new SpanFirstQuery(new SpanTermQuery(new Term(field, "ford")), 10), new SpanTermQuery(new Term(field, "credit"))}, 10, false),
+        new WildcardQuery(new Term(field, "fo*")),
+    };
+  }
+  
+  /**
+   * Parse the strings containing Lucene queries.
+   *
+   * @param qs array of strings containing query expressions
+   * @param a  analyzer to use when parsing queries
+   * @return array of Lucene queries
+   */
+  private static Query[] createQueries(List qs, Analyzer a) {
+    QueryParser qp = new QueryParser("body", a);
+    List queries = new ArrayList();
+    for (int i = 0; i < qs.size(); i++)  {
+      try {
+        
+        Object query = qs.get(i);
+        Query q = null;
+        if (query instanceof String) {
+          q = qp.parse((String) query);
+          
+        } else if (query instanceof Query) {
+          q = (Query) query;
+          
+        } else {
+          System.err.println("Unsupported Query Type: " + query);
+        }
+        
+        if (q != null) {
+          queries.add(q);
+        }
+        
+      } catch (Exception e)  {
+        e.printStackTrace();
+      }
+    }
+    
+    return (Query[]) queries.toArray(new Query[0]);
+  }
+  
   protected Query[] prepareQueries() throws Exception {
-    // analyzer (default is standard analyzer)
-    Analyzer anlzr= (Analyzer) Class.forName(config.get("analyzer",
-    "org.apache.lucene.analysis.standard.StandardAnalyzer")).newInstance(); 
-    
-    List queryList = new ArrayList(20);
-    queryList.addAll(Arrays.asList(STANDARD_QUERIES));
-    queryList.addAll(Arrays.asList(getPrebuiltQueries("body")));
+    // analyzer (default is standard analyzer)
+    Analyzer anlzr= (Analyzer) Class.forName(config.get("analyzer",
+    "org.apache.lucene.analysis.standard.StandardAnalyzer")).newInstance(); 
+    
+    List queryList = new ArrayList(20);
+    queryList.addAll(Arrays.asList(STANDARD_QUERIES));
+    queryList.addAll(Arrays.asList(getPrebuiltQueries("body")));
     return createQueries(queryList, anlzr);
-  }
+  }
+
 
+  
 
-  
-
-}
+}

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java?view=diff&rev=524969&r1=524968&r2=524969
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java Mon Apr  2 16:37:14 2007
@@ -1,69 +1,69 @@
-package org.apache.lucene.benchmark.byTask.feeds;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Create documents for the test.
- */
-public class SimpleDocMaker extends BasicDocMaker {
-  
-  private int docID = 0;
-
-  static final String DOC_TEXT = // from a public first aid info at http://firstaid.ie.eu.org 
-    "Well it may be a little dramatic but sometimes it true. " +
-    "If you call the emergency medical services to an incident, " +
-    "your actions have started the chain of survival. " +
-    "You have acted to help someone you may not even know. " +
-    "First aid is helping, first aid is making that call, " +
-    "putting a Band-Aid on a small wound, controlling bleeding in large " +
-    "wounds or providing CPR for a collapsed person whose not breathing " +
-    "and heart has stopped beating. You can help yourself, your loved " +
-    "ones and the stranger whose life may depend on you being in the " +
-    "right place at the right time with the right knowledge.";
-  
-  // return a new docid
-  private synchronized int newdocid() {
-    return docID++;
-  }
-
-  /*
-   *  (non-Javadoc)
-   * @see DocMaker#resetIinputs()
-   */
-  public synchronized void resetInputs() {
-    super.resetInputs();
-    docID = 0;
-  }
-
-  /*
-   *  (non-Javadoc)
-   * @see DocMaker#numUniqueTexts()
-   */
-  public int numUniqueTexts() {
-    return 0; // not applicable
-  }
-
-  protected DocData getNextDocData() throws NoMoreDataException {
-    if (docID>0 && !forever) {
-      throw new NoMoreDataException();
-    }
-    addBytes(DOC_TEXT.length());
-    return new DocData("doc"+newdocid(),DOC_TEXT, null, null, null);
-  }
-
-}
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Create documents for the test.
+ */
+public class SimpleDocMaker extends BasicDocMaker {
+  
+  private int docID = 0;
+
+  static final String DOC_TEXT = // from a public first aid info at http://firstaid.ie.eu.org 
+    "Well it may be a little dramatic but sometimes it true. " +
+    "If you call the emergency medical services to an incident, " +
+    "your actions have started the chain of survival. " +
+    "You have acted to help someone you may not even know. " +
+    "First aid is helping, first aid is making that call, " +
+    "putting a Band-Aid on a small wound, controlling bleeding in large " +
+    "wounds or providing CPR for a collapsed person whose not breathing " +
+    "and heart has stopped beating. You can help yourself, your loved " +
+    "ones and the stranger whose life may depend on you being in the " +
+    "right place at the right time with the right knowledge.";
+  
+  // return a new docid
+  private synchronized int newdocid() {
+    return docID++;
+  }
+
+  /*
+   *  (non-Javadoc)
+   * @see DocMaker#resetIinputs()
+   */
+  public synchronized void resetInputs() {
+    super.resetInputs();
+    docID = 0;
+  }
+
+  /*
+   *  (non-Javadoc)
+   * @see DocMaker#numUniqueTexts()
+   */
+  public int numUniqueTexts() {
+    return 0; // not applicable
+  }
+
+  protected DocData getNextDocData() throws NoMoreDataException {
+    if (docID>0 && !forever) {
+      throw new NoMoreDataException();
+    }
+    addBytes(DOC_TEXT.length());
+    return new DocData("doc"+newdocid(),DOC_TEXT, null, null, null);
+  }
+
+}

Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java
------------------------------------------------------------------------------
    svn:eol-style = native