You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by ot...@apache.org on 2007/04/03 01:37:17 UTC
svn commit: r524969 [1/5] - in
/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark:
./ byTask/ byTask/feeds/ byTask/programmatic/ byTask/stats/ byTask/tasks/
byTask/utils/ standard/ stats/ utils/
Author: otis
Date: Mon Apr 2 16:37:14 2007
New Revision: 524969
URL: http://svn.apache.org/viewvc?view=rev&rev=524969
Log:
- find . -name \*java -type f | xargs perl -pi -e 's/(\r\n|\n|\r)/\n/g' \
&& find . -name \*java -type f | xargs svn propset svn:eol-style native
Modified:
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/AbstractBenchmarker.java (props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/BenchmarkOptions.java (props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/Benchmarker.java (props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/Constants.java (props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/Driver.java (props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/QueryMaker.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleQueryMaker.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleSloppyPhraseQueryMaker.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocMaker.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/programmatic/Sample.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/Points.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/Report.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/TaskStats.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ClearStatsTask.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseIndexTask.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseReaderTask.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/DeleteDocTask.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewRoundTask.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenIndexTask.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenReaderTask.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OptimizeTask.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/RepAllTask.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/RepSelectByPrefTask.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByNameRoundTask.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByNameTask.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByPrefRoundTask.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByPrefTask.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReportTask.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ResetInputsTask.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ResetSystemEraseTask.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ResetSystemSoftTask.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTask.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetTask.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravTask.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SetPropTask.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WarmTask.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Config.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/FileUtils.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Format.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/standard/ReutersQueries.java (props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/standard/StandardBenchmarker.java (props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/standard/StandardOptions.java (props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/stats/MemUsage.java (props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/stats/QueryData.java (props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/stats/TestData.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/stats/TestRunData.java (props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/stats/TimeData.java (contents, props changed)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java (props changed)
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/AbstractBenchmarker.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/BenchmarkOptions.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/Benchmarker.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/Constants.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/Driver.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java?view=diff&rev=524969&r1=524968&r2=524969
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java Mon Apr 2 16:37:14 2007
@@ -1,134 +1,134 @@
-package org.apache.lucene.benchmark.byTask;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.File;
-import java.io.FileReader;
-import java.io.Reader;
-
-import org.apache.lucene.benchmark.byTask.utils.Algorithm;
-import org.apache.lucene.benchmark.byTask.utils.Config;
-
-
-/**
- * Run the benchmark algorithm.
- * <p>Usage: java Benchmark algorithm-file
- * <ol>
- * <li>Read algorithm.</li>
- * <li> Run the algorithm.</li>
- * </ol>
- * Things to be added/fixed in "Benchmarking by tasks":
- * <ol>
- * <li>TODO - report into Excel and/or graphed view.</li>
- * <li>TODO - perf comparison between Lucene releases over the years.</li>
- * <li>TODO - perf report adequate to include in Lucene nightly build site? (so we can easily track performance changes.)</li>
- * <li>TODO - add overall time control for repeated execution (vs. current by-count only).</li>
- * <li>TODO - query maker that is based on index statistics.</li>
- * </ol>
- */
-public class Benchmark {
-
- private PerfRunData runData;
- private Algorithm algorithm;
- private boolean executed;
-
- public Benchmark (Reader algReader) throws Exception {
- // prepare run data
- try {
- runData = new PerfRunData(new Config(algReader));
- } catch (Exception e) {
- e.printStackTrace();
- throw new Exception("Error: cannot init PerfRunData!",e);
- }
-
- // parse algorithm
- try {
- algorithm = new Algorithm(runData);
- } catch (Exception e) {
- throw new Exception("Error: cannot understand algorithm!",e);
- }
- }
-
- public synchronized void execute() throws Exception {
- if (executed) {
- throw new IllegalStateException("Benchmark was already executed");
- }
- executed = true;
- algorithm.execute();
- }
-
- /**
- * Run the benchmark algorithm.
- * @param args benchmark config and algorithm files
- */
- public static void main(String[] args) {
- // verify command line args
- if (args.length < 1) {
- System.err.println("Usage: java Benchmark <algorithm file>");
- System.exit(1);
- }
-
- // verify input files
- File algFile = new File(args[0]);
- if (!algFile.exists() || !algFile.isFile() || !algFile.canRead()) {
- System.err.println("cannot find/read algorithm file: "+algFile.getAbsolutePath());
- System.exit(1);
- }
-
- System.out.println("Running algorithm from: "+algFile.getAbsolutePath());
-
- Benchmark benchmark = null;
- try {
- benchmark = new Benchmark(new FileReader(algFile));
- } catch (Exception e) {
- e.printStackTrace();
- System.exit(1);
- }
-
- System.out.println("------------> algorithm:");
- System.out.println(benchmark.getAlgorithm().toString());
-
- // execute
- try {
- benchmark.execute();
- } catch (Exception e) {
- System.err.println("Error: cannot execute the algorithm! "+e.getMessage());
- e.printStackTrace();
- }
-
- System.out.println("####################");
- System.out.println("### D O N E !!! ###");
- System.out.println("####################");
-
- }
-
- /**
- * @return Returns the algorithm.
- */
- public Algorithm getAlgorithm() {
- return algorithm;
- }
-
- /**
- * @return Returns the runData.
- */
- public PerfRunData getRunData() {
- return runData;
- }
-
-}
+package org.apache.lucene.benchmark.byTask;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.Reader;
+
+import org.apache.lucene.benchmark.byTask.utils.Algorithm;
+import org.apache.lucene.benchmark.byTask.utils.Config;
+
+
+/**
+ * Run the benchmark algorithm.
+ * <p>Usage: java Benchmark algorithm-file
+ * <ol>
+ * <li>Read algorithm.</li>
+ * <li> Run the algorithm.</li>
+ * </ol>
+ * Things to be added/fixed in "Benchmarking by tasks":
+ * <ol>
+ * <li>TODO - report into Excel and/or graphed view.</li>
+ * <li>TODO - perf comparison between Lucene releases over the years.</li>
+ * <li>TODO - perf report adequate to include in Lucene nightly build site? (so we can easily track performance changes.)</li>
+ * <li>TODO - add overall time control for repeated execution (vs. current by-count only).</li>
+ * <li>TODO - query maker that is based on index statistics.</li>
+ * </ol>
+ */
+public class Benchmark {
+
+ private PerfRunData runData;
+ private Algorithm algorithm;
+ private boolean executed;
+
+ public Benchmark (Reader algReader) throws Exception {
+ // prepare run data
+ try {
+ runData = new PerfRunData(new Config(algReader));
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new Exception("Error: cannot init PerfRunData!",e);
+ }
+
+ // parse algorithm
+ try {
+ algorithm = new Algorithm(runData);
+ } catch (Exception e) {
+ throw new Exception("Error: cannot understand algorithm!",e);
+ }
+ }
+
+ public synchronized void execute() throws Exception {
+ if (executed) {
+ throw new IllegalStateException("Benchmark was already executed");
+ }
+ executed = true;
+ algorithm.execute();
+ }
+
+ /**
+ * Run the benchmark algorithm.
+ * @param args benchmark config and algorithm files
+ */
+ public static void main(String[] args) {
+ // verify command line args
+ if (args.length < 1) {
+ System.err.println("Usage: java Benchmark <algorithm file>");
+ System.exit(1);
+ }
+
+ // verify input files
+ File algFile = new File(args[0]);
+ if (!algFile.exists() || !algFile.isFile() || !algFile.canRead()) {
+ System.err.println("cannot find/read algorithm file: "+algFile.getAbsolutePath());
+ System.exit(1);
+ }
+
+ System.out.println("Running algorithm from: "+algFile.getAbsolutePath());
+
+ Benchmark benchmark = null;
+ try {
+ benchmark = new Benchmark(new FileReader(algFile));
+ } catch (Exception e) {
+ e.printStackTrace();
+ System.exit(1);
+ }
+
+ System.out.println("------------> algorithm:");
+ System.out.println(benchmark.getAlgorithm().toString());
+
+ // execute
+ try {
+ benchmark.execute();
+ } catch (Exception e) {
+ System.err.println("Error: cannot execute the algorithm! "+e.getMessage());
+ e.printStackTrace();
+ }
+
+ System.out.println("####################");
+ System.out.println("### D O N E !!! ###");
+ System.out.println("####################");
+
+ }
+
+ /**
+ * @return Returns the algorithm.
+ */
+ public Algorithm getAlgorithm() {
+ return algorithm;
+ }
+
+ /**
+ * @return Returns the runData.
+ */
+ public PerfRunData getRunData() {
+ return runData;
+ }
+
+}
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java?view=diff&rev=524969&r1=524968&r2=524969
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java Mon Apr 2 16:37:14 2007
@@ -1,222 +1,222 @@
-package org.apache.lucene.benchmark.byTask;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.File;
+package org.apache.lucene.benchmark.byTask;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
import java.util.HashMap;
import java.util.Iterator;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
-import org.apache.lucene.benchmark.byTask.feeds.HTMLParser;
-import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
-import org.apache.lucene.benchmark.byTask.stats.Points;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
+import org.apache.lucene.benchmark.byTask.feeds.HTMLParser;
+import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
+import org.apache.lucene.benchmark.byTask.stats.Points;
import org.apache.lucene.benchmark.byTask.tasks.ReadTask;
import org.apache.lucene.benchmark.byTask.tasks.SearchTask;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.benchmark.byTask.utils.Config;
-import org.apache.lucene.benchmark.byTask.utils.FileUtils;
-
-
-/**
- * Data maintained by a performance test run.
- * <p>
- * Data includes:
- * <ul>
- * <li>Configuration.
- * <li>Directory, Writer, Reader.
- * <li>Docmaker and a few instances of QueryMaker.
- * <li>Analyzer.
- * <li>Statistics data which updated during the run.
- * </ul>
- */
-public class PerfRunData {
-
- private Points points;
-
- // objects used during performance test run
- // directory, analyzer, docMaker - created at startup.
- // reader, writer, searcher - maintained by basic tasks.
- private Directory directory;
- private Analyzer analyzer;
- private DocMaker docMaker;
- private HTMLParser htmlParser;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.benchmark.byTask.utils.FileUtils;
+
+
+/**
+ * Data maintained by a performance test run.
+ * <p>
+ * Data includes:
+ * <ul>
+ * <li>Configuration.
+ * <li>Directory, Writer, Reader.
+ * <li>Docmaker and a few instances of QueryMaker.
+ * <li>Analyzer.
+ * <li>Statistics data which updated during the run.
+ * </ul>
+ */
+public class PerfRunData {
+
+ private Points points;
+
+ // objects used during performance test run
+ // directory, analyzer, docMaker - created at startup.
+ // reader, writer, searcher - maintained by basic tasks.
+ private Directory directory;
+ private Analyzer analyzer;
+ private DocMaker docMaker;
+ private HTMLParser htmlParser;
// we use separate (identical) instances for each "read" task type, so each can iterate the quries separately.
private HashMap readTaskQueryMaker;
private Class qmkrClass;
-
- private IndexReader indexReader;
- private IndexWriter indexWriter;
- private Config config;
-
- // constructor
- public PerfRunData (Config config) throws Exception {
- this.config = config;
- // analyzer (default is standard analyzer)
- analyzer = (Analyzer) Class.forName(config.get("analyzer",
- "org.apache.lucene.analysis.standard.StandardAnalyzer")).newInstance();
- // doc maker
- docMaker = (DocMaker) Class.forName(config.get("doc.maker",
- "org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker")).newInstance();
- docMaker.setConfig(config);
- // query makers
+
+ private IndexReader indexReader;
+ private IndexWriter indexWriter;
+ private Config config;
+
+ // constructor
+ public PerfRunData (Config config) throws Exception {
+ this.config = config;
+ // analyzer (default is standard analyzer)
+ analyzer = (Analyzer) Class.forName(config.get("analyzer",
+ "org.apache.lucene.analysis.standard.StandardAnalyzer")).newInstance();
+ // doc maker
+ docMaker = (DocMaker) Class.forName(config.get("doc.maker",
+ "org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker")).newInstance();
+ docMaker.setConfig(config);
+ // query makers
readTaskQueryMaker = new HashMap();
- qmkrClass = Class.forName(config.get("query.maker","org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker"));
- // html parser, used for some doc makers
- htmlParser = (HTMLParser) Class.forName(config.get("html.parser","org.apache.lucene.benchmark.byTask.feeds.DemoHTMLParser")).newInstance();
+ qmkrClass = Class.forName(config.get("query.maker","org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker"));
+ // html parser, used for some doc makers
+ htmlParser = (HTMLParser) Class.forName(config.get("html.parser","org.apache.lucene.benchmark.byTask.feeds.DemoHTMLParser")).newInstance();
docMaker.setHTMLParser(htmlParser);
- // index stuff
- reinit(false);
-
- // statistic points
- points = new Points(config);
-
- if (Boolean.valueOf(config.get("log.queries","false")).booleanValue()) {
- System.out.println("------------> queries:");
+ // index stuff
+ reinit(false);
+
+ // statistic points
+ points = new Points(config);
+
+ if (Boolean.valueOf(config.get("log.queries","false")).booleanValue()) {
+ System.out.println("------------> queries:");
System.out.println(getQueryMaker(new SearchTask(this)).printQueries());
- }
-
- }
-
- // clean old stuff, reopen
- public void reinit(boolean eraseIndex) throws Exception {
-
- // cleanup index
- if (indexWriter!=null) {
- indexWriter.close();
- indexWriter = null;
- }
- if (indexReader!=null) {
- indexReader.close();
- indexReader = null;
- }
- if (directory!=null) {
- directory.close();
- }
-
- // directory (default is ram-dir).
- if ("FSDirectory".equals(config.get("directory","RAMDirectory"))) {
- File workDir = new File("work");
- File indexDir = new File(workDir,"index");
- if (eraseIndex && indexDir.exists()) {
- FileUtils.fullyDelete(indexDir);
- }
- indexDir.mkdirs();
+ }
+
+ }
+
+ // clean old stuff, reopen
+ public void reinit(boolean eraseIndex) throws Exception {
+
+ // cleanup index
+ if (indexWriter!=null) {
+ indexWriter.close();
+ indexWriter = null;
+ }
+ if (indexReader!=null) {
+ indexReader.close();
+ indexReader = null;
+ }
+ if (directory!=null) {
+ directory.close();
+ }
+
+ // directory (default is ram-dir).
+ if ("FSDirectory".equals(config.get("directory","RAMDirectory"))) {
+ File workDir = new File("work");
+ File indexDir = new File(workDir,"index");
+ if (eraseIndex && indexDir.exists()) {
+ FileUtils.fullyDelete(indexDir);
+ }
+ indexDir.mkdirs();
directory = FSDirectory.getDirectory(indexDir);
- } else {
- directory = new RAMDirectory();
- }
-
- // inputs
- resetInputs();
-
- // release unused stuff
- System.runFinalization();
- System.gc();
- }
-
- /**
- * @return Returns the points.
- */
- public Points getPoints() {
- return points;
- }
-
- /**
- * @return Returns the directory.
- */
- public Directory getDirectory() {
- return directory;
- }
-
- /**
- * @param directory The directory to set.
- */
- public void setDirectory(Directory directory) {
- this.directory = directory;
- }
-
- /**
- * @return Returns the indexReader.
- */
- public IndexReader getIndexReader() {
- return indexReader;
- }
-
- /**
- * @param indexReader The indexReader to set.
- */
- public void setIndexReader(IndexReader indexReader) {
- this.indexReader = indexReader;
- }
-
- /**
- * @return Returns the indexWriter.
- */
- public IndexWriter getIndexWriter() {
- return indexWriter;
- }
-
- /**
- * @param indexWriter The indexWriter to set.
- */
- public void setIndexWriter(IndexWriter indexWriter) {
- this.indexWriter = indexWriter;
- }
-
- /**
- * @return Returns the anlyzer.
- */
- public Analyzer getAnalyzer() {
- return analyzer;
- }
-
- /**
- * @return Returns the docMaker.
- */
- public DocMaker getDocMaker() {
- return docMaker;
- }
-
- /**
- * @return Returns the config.
- */
- public Config getConfig() {
- return config;
- }
-
- public void resetInputs() {
- docMaker.resetInputs();
+ } else {
+ directory = new RAMDirectory();
+ }
+
+ // inputs
+ resetInputs();
+
+ // release unused stuff
+ System.runFinalization();
+ System.gc();
+ }
+
+ /**
+ * @return Returns the points.
+ */
+ public Points getPoints() {
+ return points;
+ }
+
+ /**
+ * @return Returns the directory.
+ */
+ public Directory getDirectory() {
+ return directory;
+ }
+
+ /**
+ * @param directory The directory to set.
+ */
+ public void setDirectory(Directory directory) {
+ this.directory = directory;
+ }
+
+ /**
+ * @return Returns the indexReader.
+ */
+ public IndexReader getIndexReader() {
+ return indexReader;
+ }
+
+ /**
+ * @param indexReader The indexReader to set.
+ */
+ public void setIndexReader(IndexReader indexReader) {
+ this.indexReader = indexReader;
+ }
+
+ /**
+ * @return Returns the indexWriter.
+ */
+ public IndexWriter getIndexWriter() {
+ return indexWriter;
+ }
+
+ /**
+ * @param indexWriter The indexWriter to set.
+ */
+ public void setIndexWriter(IndexWriter indexWriter) {
+ this.indexWriter = indexWriter;
+ }
+
+ /**
+ * @return Returns the anlyzer.
+ */
+ public Analyzer getAnalyzer() {
+ return analyzer;
+ }
+
+ /**
+ * @return Returns the docMaker.
+ */
+ public DocMaker getDocMaker() {
+ return docMaker;
+ }
+
+ /**
+ * @return Returns the config.
+ */
+ public Config getConfig() {
+ return config;
+ }
+
+ public void resetInputs() {
+ docMaker.resetInputs();
Iterator it = readTaskQueryMaker.values().iterator();
while (it.hasNext()) {
((QueryMaker) it.next()).resetInputs();
}
- }
-
- /**
+ }
+
+ /**
* @return Returns the queryMaker by read task type (class)
- */
+ */
public QueryMaker getQueryMaker(ReadTask readTask) {
// mapping the query maker by task class allows extending/adding new search/read tasks
// without needing to modify this class.
@@ -232,13 +232,13 @@
readTaskQueryMaker.put(readTaskClass,qm);
}
return qm;
- }
-
- /**
- * @return Returns the htmlParser.
- */
- public HTMLParser getHtmlParser() {
- return htmlParser;
- }
-
-}
+ }
+
+ /**
+ * @return Returns the htmlParser.
+ */
+ public HTMLParser getHtmlParser() {
+ return htmlParser;
+ }
+
+}
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java?view=diff&rev=524969&r1=524968&r2=524969
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java Mon Apr 2 16:37:14 2007
@@ -1,309 +1,309 @@
-package org.apache.lucene.benchmark.byTask.feeds;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.benchmark.byTask.utils.Config;
-import org.apache.lucene.benchmark.byTask.utils.Format;
-import org.apache.lucene.document.DateTools;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-
-import java.io.File;
-import java.io.UnsupportedEncodingException;
-import java.util.ArrayList;
-import java.util.Iterator;
-
-
-/**
- * Create documents for the test.
- * Maintains counters of chars etc. so that sub-classes just need to
- * provide textual content, and the create-by-size is handled here.
- *
- * <p/>
- * Config Params (default is in caps):
- * doc.stored=true|FALSE<br/>
- * doc.tokenized=TRUE|false<br/>
- * doc.term.vector=true|FALSE<br/>
- * doc.store.body.bytes=true|FALSE //Store the body contents raw UTF-8 bytes as a field<br/>
- */
-public abstract class BasicDocMaker implements DocMaker {
-
- private int numDocsCreated = 0;
- private boolean storeBytes = false;
- protected boolean forever;
-
- private static class LeftOver {
- private DocData docdata;
- private int cnt;
- }
-
- // leftovers are thread local, because it is unsafe to share residues between threads
- private ThreadLocal leftovr = new ThreadLocal();
-
- static final String BODY_FIELD = "body";
- private long numBytes = 0;
- private long numUniqueBytes = 0;
-
- protected Config config;
-
- protected Field.Store storeVal = Field.Store.NO;
- protected Field.Index indexVal = Field.Index.TOKENIZED;
- protected Field.TermVector termVecVal = Field.TermVector.NO;
-
- private synchronized int incrNumDocsCreated() {
- return numDocsCreated++;
- }
-
- /**
- * Return the data of the next document.
- * All current implementations can create docs forever.
- * When the input data is exhausted, input files are iterated.
- * This re-iteration can be avoided by setting doc.maker.forever to false (default is true).
- * @return data of the next document.
- * @exception if cannot create the next doc data
- * @exception NoMoreDataException if data is exhausted (and 'forever' set to false).
- */
- protected abstract DocData getNextDocData() throws NoMoreDataException, Exception;
-
- /*
- * (non-Javadoc)
- * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#makeDocument()
- */
- public Document makeDocument () throws Exception {
- resetLeftovers();
- DocData docData = getNextDocData();
- Document doc = createDocument(docData,0,-1);
- return doc;
- }
-
- // create a doc
- // use only part of the body, modify it to keep the rest (or use all if size==0).
- // reset the docdata properties so they are not added more than once.
- private Document createDocument(DocData docData, int size, int cnt) throws UnsupportedEncodingException {
- int docid = incrNumDocsCreated();
- Document doc = new Document();
- doc.add(new Field("docid", "doc"+docid, storeVal, indexVal, termVecVal));
- if (docData.getName()!=null) {
- String name = (cnt<0 ? docData.getName() : docData.getName()+"_"+cnt);
- doc.add(new Field("docname", name, storeVal, indexVal, termVecVal));
- }
- if (docData.getDate()!=null) {
- String dateStr = DateTools.dateToString(docData.getDate(), DateTools.Resolution.SECOND);
- doc.add(new Field("docdate", dateStr, storeVal, indexVal, termVecVal));
- }
- if (docData.getTitle()!=null) {
- doc.add(new Field("doctitle", docData.getTitle(), storeVal, indexVal, termVecVal));
- }
- if (docData.getBody()!=null && docData.getBody().length()>0) {
- String bdy;
- if (size<=0 || size>=docData.getBody().length()) {
- bdy = docData.getBody(); // use all
- docData.setBody(""); // nothing left
- } else {
- // attempt not to break words - if whitespace found within next 20 chars...
- for (int n=size-1; n<size+20 && n<docData.getBody().length(); n++) {
- if (Character.isWhitespace(docData.getBody().charAt(n))) {
- size = n;
- break;
- }
- }
- bdy = docData.getBody().substring(0,size); // use part
- docData.setBody(docData.getBody().substring(size)); // some left
- }
- doc.add(new Field(BODY_FIELD, bdy, storeVal, indexVal, termVecVal));
- if (storeBytes == true) {
- doc.add(new Field("bytes", bdy.getBytes("UTF-8"), Field.Store.YES));
- }
- }
-
- if (docData.getProps()!=null) {
- for (Iterator it = docData.getProps().keySet().iterator(); it.hasNext(); ) {
- String key = (String) it.next();
- String val = (String) docData.getProps().get(key);
- doc.add(new Field(key, val, storeVal, indexVal, termVecVal));
- }
- docData.setProps(null);
- }
- //System.out.println("============== Created doc "+numDocsCreated+" :\n"+doc+"\n==========");
- return doc;
- }
-
- /*
- * (non-Javadoc)
- * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#makeDocument(int)
- */
- public Document makeDocument(int size) throws Exception {
- LeftOver lvr = (LeftOver) leftovr.get();
- if (lvr==null || lvr.docdata==null || lvr.docdata.getBody()==null || lvr.docdata.getBody().length()==0) {
- resetLeftovers();
- }
- DocData dd = (lvr==null ? getNextDocData() : lvr.docdata);
- int cnt = (lvr==null ? 0 : lvr.cnt);
- while (dd.getBody()==null || dd.getBody().length()<size) {
- DocData dd2 = dd;
- dd = getNextDocData();
- cnt = 0;
- dd.setBody(dd2.getBody() + dd.getBody());
- }
- Document doc = createDocument(dd,size,cnt);
- if (dd.getBody()==null || dd.getBody().length()==0) {
- resetLeftovers();
- } else {
- if (lvr == null) {
- lvr = new LeftOver();
- leftovr.set(lvr);
- }
- lvr.docdata = dd;
- lvr.cnt = ++cnt;
- }
- return doc;
- }
-
- private void resetLeftovers() {
- leftovr.set(null);
- }
-
- /* (non-Javadoc)
- * @see DocMaker#setConfig(java.util.Properties)
- */
- public void setConfig(Config config) {
- this.config = config;
- boolean stored = config.get("doc.stored",false);
- boolean tokenized = config.get("doc.tokenized",true);
- boolean termVec = config.get("doc.term.vector",false);
- storeVal = (stored ? Field.Store.YES : Field.Store.NO);
- indexVal = (tokenized ? Field.Index.TOKENIZED : Field.Index.UN_TOKENIZED);
- termVecVal = (termVec ? Field.TermVector.YES : Field.TermVector.NO);
- storeBytes = config.get("doc.store.body.bytes", false);
- forever = config.get("doc.maker.forever",true);
- }
-
- /*
- * (non-Javadoc)
- * @see DocMaker#resetIinputs()
- */
- public synchronized void resetInputs() {
- printDocStatistics();
- numBytes = 0;
- numDocsCreated = 0;
- resetLeftovers();
- }
-
- /*
- * (non-Javadoc)
- * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#numUniqueBytes()
- */
- public long numUniqueBytes() {
- return numUniqueBytes;
- }
-
- /*
- * (non-Javadoc)
- * @see DocMaker#getCount()
- */
- public synchronized int getCount() {
- return numDocsCreated;
- }
-
- /*
- * (non-Javadoc)
- * @see DocMaker#getByteCount()
- */
- public synchronized long getByteCount() {
- return numBytes;
- }
-
- protected void addUniqueBytes (long n) {
- numUniqueBytes += n;
- }
-
- protected synchronized void addBytes (long n) {
- numBytes += n;
- }
-
- /*
- * (non-Javadoc)
- * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#printDocStatistics()
- */
- private int lastPrintedNumUniqueTexts = 0;
- private long lastPrintedNumUniqueBytes = 0;
- private int printNum = 0;
- private HTMLParser htmlParser;
-
- public void printDocStatistics() {
- boolean print = false;
- String col = " ";
- StringBuffer sb = new StringBuffer();
- String newline = System.getProperty("line.separator");
- sb.append("------------> ").append(Format.simpleName(getClass())).append(" statistics (").append(printNum).append("): ").append(newline);
- int nut = numUniqueTexts();
- if (nut > lastPrintedNumUniqueTexts) {
- print = true;
- sb.append("total count of unique texts: ").append(Format.format(0,nut,col)).append(newline);
- lastPrintedNumUniqueTexts = nut;
- }
- long nub = numUniqueBytes();
- if (nub > lastPrintedNumUniqueBytes) {
- print = true;
- sb.append("total bytes of unique texts: ").append(Format.format(0,nub,col)).append(newline);
- lastPrintedNumUniqueBytes = nub;
- }
- if (getCount()>0) {
- print = true;
- sb.append("num docs added since last inputs reset: ").append(Format.format(0,getCount(),col)).append(newline);
- sb.append("total bytes added since last inputs reset: ").append(Format.format(0,getByteCount(),col)).append(newline);
- }
- if (print) {
- System.out.println(sb.append(newline).toString());
- printNum++;
- }
- }
-
- protected void collectFiles(File f, ArrayList inputFiles) {
- //System.out.println("Collect: "+f.getAbsolutePath());
- if (!f.canRead()) {
- return;
- }
- if (f.isDirectory()) {
- File files[] = f.listFiles();
- for (int i = 0; i < files.length; i++) {
- collectFiles(files[i],inputFiles);
- }
- return;
- }
- inputFiles.add(f);
- addUniqueBytes(f.length());
- }
-
- /* (non-Javadoc)
- * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#setHTMLParser(org.apache.lucene.benchmark.byTask.feeds.HTMLParser)
- */
- public void setHTMLParser(HTMLParser htmlParser) {
- this.htmlParser = htmlParser;
- }
-
- /*
- * (non-Javadoc)
- * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#getHtmlParser()
- */
- public HTMLParser getHtmlParser() {
- return htmlParser;
- }
-
-
-}
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.benchmark.byTask.utils.Format;
+import org.apache.lucene.document.DateTools;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+
+import java.io.File;
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.Iterator;
+
+
+/**
+ * Create documents for the test.
+ * Maintains counters of chars etc. so that sub-classes just need to
+ * provide textual content, and the create-by-size is handled here.
+ *
+ * <p/>
+ * Config Params (default is in caps):
+ * doc.stored=true|FALSE<br/>
+ * doc.tokenized=TRUE|false<br/>
+ * doc.term.vector=true|FALSE<br/>
+ * doc.store.body.bytes=true|FALSE //Store the body contents raw UTF-8 bytes as a field<br/>
+ */
+public abstract class BasicDocMaker implements DocMaker {
+
+ private int numDocsCreated = 0;
+ private boolean storeBytes = false;
+ protected boolean forever;
+
+ private static class LeftOver {
+ private DocData docdata;
+ private int cnt;
+ }
+
+ // leftovers are thread local, because it is unsafe to share residues between threads
+ private ThreadLocal leftovr = new ThreadLocal();
+
+ static final String BODY_FIELD = "body";
+ private long numBytes = 0;
+ private long numUniqueBytes = 0;
+
+ protected Config config;
+
+ protected Field.Store storeVal = Field.Store.NO;
+ protected Field.Index indexVal = Field.Index.TOKENIZED;
+ protected Field.TermVector termVecVal = Field.TermVector.NO;
+
+ private synchronized int incrNumDocsCreated() {
+ return numDocsCreated++;
+ }
+
+ /**
+ * Return the data of the next document.
+ * All current implementations can create docs forever.
+ * When the input data is exhausted, input files are iterated.
+ * This re-iteration can be avoided by setting doc.maker.forever to false (default is true).
+ * @return data of the next document.
+ * @exception if cannot create the next doc data
+ * @exception NoMoreDataException if data is exhausted (and 'forever' set to false).
+ */
+ protected abstract DocData getNextDocData() throws NoMoreDataException, Exception;
+
+ /*
+ * (non-Javadoc)
+ * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#makeDocument()
+ */
+ public Document makeDocument () throws Exception {
+ resetLeftovers();
+ DocData docData = getNextDocData();
+ Document doc = createDocument(docData,0,-1);
+ return doc;
+ }
+
+ // create a doc
+ // use only part of the body, modify it to keep the rest (or use all if size==0).
+ // reset the docdata properties so they are not added more than once.
+ private Document createDocument(DocData docData, int size, int cnt) throws UnsupportedEncodingException {
+ int docid = incrNumDocsCreated();
+ Document doc = new Document();
+ doc.add(new Field("docid", "doc"+docid, storeVal, indexVal, termVecVal));
+ if (docData.getName()!=null) {
+ String name = (cnt<0 ? docData.getName() : docData.getName()+"_"+cnt);
+ doc.add(new Field("docname", name, storeVal, indexVal, termVecVal));
+ }
+ if (docData.getDate()!=null) {
+ String dateStr = DateTools.dateToString(docData.getDate(), DateTools.Resolution.SECOND);
+ doc.add(new Field("docdate", dateStr, storeVal, indexVal, termVecVal));
+ }
+ if (docData.getTitle()!=null) {
+ doc.add(new Field("doctitle", docData.getTitle(), storeVal, indexVal, termVecVal));
+ }
+ if (docData.getBody()!=null && docData.getBody().length()>0) {
+ String bdy;
+ if (size<=0 || size>=docData.getBody().length()) {
+ bdy = docData.getBody(); // use all
+ docData.setBody(""); // nothing left
+ } else {
+ // attempt not to break words - if whitespace found within next 20 chars...
+ for (int n=size-1; n<size+20 && n<docData.getBody().length(); n++) {
+ if (Character.isWhitespace(docData.getBody().charAt(n))) {
+ size = n;
+ break;
+ }
+ }
+ bdy = docData.getBody().substring(0,size); // use part
+ docData.setBody(docData.getBody().substring(size)); // some left
+ }
+ doc.add(new Field(BODY_FIELD, bdy, storeVal, indexVal, termVecVal));
+ if (storeBytes == true) {
+ doc.add(new Field("bytes", bdy.getBytes("UTF-8"), Field.Store.YES));
+ }
+ }
+
+ if (docData.getProps()!=null) {
+ for (Iterator it = docData.getProps().keySet().iterator(); it.hasNext(); ) {
+ String key = (String) it.next();
+ String val = (String) docData.getProps().get(key);
+ doc.add(new Field(key, val, storeVal, indexVal, termVecVal));
+ }
+ docData.setProps(null);
+ }
+ //System.out.println("============== Created doc "+numDocsCreated+" :\n"+doc+"\n==========");
+ return doc;
+ }
+
+ /*
+ * (non-Javadoc)
+ * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#makeDocument(int)
+ */
+ public Document makeDocument(int size) throws Exception {
+ LeftOver lvr = (LeftOver) leftovr.get();
+ if (lvr==null || lvr.docdata==null || lvr.docdata.getBody()==null || lvr.docdata.getBody().length()==0) {
+ resetLeftovers();
+ }
+ DocData dd = (lvr==null ? getNextDocData() : lvr.docdata);
+ int cnt = (lvr==null ? 0 : lvr.cnt);
+ while (dd.getBody()==null || dd.getBody().length()<size) {
+ DocData dd2 = dd;
+ dd = getNextDocData();
+ cnt = 0;
+ dd.setBody(dd2.getBody() + dd.getBody());
+ }
+ Document doc = createDocument(dd,size,cnt);
+ if (dd.getBody()==null || dd.getBody().length()==0) {
+ resetLeftovers();
+ } else {
+ if (lvr == null) {
+ lvr = new LeftOver();
+ leftovr.set(lvr);
+ }
+ lvr.docdata = dd;
+ lvr.cnt = ++cnt;
+ }
+ return doc;
+ }
+
+ private void resetLeftovers() {
+ leftovr.set(null);
+ }
+
+ /* (non-Javadoc)
+ * @see DocMaker#setConfig(java.util.Properties)
+ */
+ public void setConfig(Config config) {
+ this.config = config;
+ boolean stored = config.get("doc.stored",false);
+ boolean tokenized = config.get("doc.tokenized",true);
+ boolean termVec = config.get("doc.term.vector",false);
+ storeVal = (stored ? Field.Store.YES : Field.Store.NO);
+ indexVal = (tokenized ? Field.Index.TOKENIZED : Field.Index.UN_TOKENIZED);
+ termVecVal = (termVec ? Field.TermVector.YES : Field.TermVector.NO);
+ storeBytes = config.get("doc.store.body.bytes", false);
+ forever = config.get("doc.maker.forever",true);
+ }
+
+ /*
+ * (non-Javadoc)
+ * @see DocMaker#resetIinputs()
+ */
+ public synchronized void resetInputs() {
+ printDocStatistics();
+ numBytes = 0;
+ numDocsCreated = 0;
+ resetLeftovers();
+ }
+
+ /*
+ * (non-Javadoc)
+ * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#numUniqueBytes()
+ */
+ public long numUniqueBytes() {
+ return numUniqueBytes;
+ }
+
+ /*
+ * (non-Javadoc)
+ * @see DocMaker#getCount()
+ */
+ public synchronized int getCount() {
+ return numDocsCreated;
+ }
+
+ /*
+ * (non-Javadoc)
+ * @see DocMaker#getByteCount()
+ */
+ public synchronized long getByteCount() {
+ return numBytes;
+ }
+
+ protected void addUniqueBytes (long n) {
+ numUniqueBytes += n;
+ }
+
+ protected synchronized void addBytes (long n) {
+ numBytes += n;
+ }
+
+ /*
+ * (non-Javadoc)
+ * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#printDocStatistics()
+ */
+ private int lastPrintedNumUniqueTexts = 0;
+ private long lastPrintedNumUniqueBytes = 0;
+ private int printNum = 0;
+ private HTMLParser htmlParser;
+
+ public void printDocStatistics() {
+ boolean print = false;
+ String col = " ";
+ StringBuffer sb = new StringBuffer();
+ String newline = System.getProperty("line.separator");
+ sb.append("------------> ").append(Format.simpleName(getClass())).append(" statistics (").append(printNum).append("): ").append(newline);
+ int nut = numUniqueTexts();
+ if (nut > lastPrintedNumUniqueTexts) {
+ print = true;
+ sb.append("total count of unique texts: ").append(Format.format(0,nut,col)).append(newline);
+ lastPrintedNumUniqueTexts = nut;
+ }
+ long nub = numUniqueBytes();
+ if (nub > lastPrintedNumUniqueBytes) {
+ print = true;
+ sb.append("total bytes of unique texts: ").append(Format.format(0,nub,col)).append(newline);
+ lastPrintedNumUniqueBytes = nub;
+ }
+ if (getCount()>0) {
+ print = true;
+ sb.append("num docs added since last inputs reset: ").append(Format.format(0,getCount(),col)).append(newline);
+ sb.append("total bytes added since last inputs reset: ").append(Format.format(0,getByteCount(),col)).append(newline);
+ }
+ if (print) {
+ System.out.println(sb.append(newline).toString());
+ printNum++;
+ }
+ }
+
+ protected void collectFiles(File f, ArrayList inputFiles) {
+ //System.out.println("Collect: "+f.getAbsolutePath());
+ if (!f.canRead()) {
+ return;
+ }
+ if (f.isDirectory()) {
+ File files[] = f.listFiles();
+ for (int i = 0; i < files.length; i++) {
+ collectFiles(files[i],inputFiles);
+ }
+ return;
+ }
+ inputFiles.add(f);
+ addUniqueBytes(f.length());
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#setHTMLParser(org.apache.lucene.benchmark.byTask.feeds.HTMLParser)
+ */
+ public void setHTMLParser(HTMLParser htmlParser) {
+ this.htmlParser = htmlParser;
+ }
+
+ /*
+ * (non-Javadoc)
+ * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#getHtmlParser()
+ */
+ public HTMLParser getHtmlParser() {
+ return htmlParser;
+ }
+
+
+}
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java?view=diff&rev=524969&r1=524968&r2=524969
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java Mon Apr 2 16:37:14 2007
@@ -1,71 +1,71 @@
-package org.apache.lucene.benchmark.byTask.feeds;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.benchmark.byTask.utils.Config;
-
-
-/**
- * Create documents for the test.
- * <br>Each call to makeDocument would create the next document.
- * When input is exhausted, the DocMaker iterates over the input again,
- * does provifing a source for unlimited number of documents,
- * though not all of them are unique.
- */
-public interface DocMaker {
-
- /**
- * Create the next document, of the given size by input bytes.
- * If the implementation does not support control over size, an exception is thrown.
- * @param size size of document, or 0 if there is no size requirement.
- * @exception if cannot make the document, or if size>0 was specified but this feature is not supported.
- */
- public Document makeDocument (int size) throws Exception;
-
- /** Create the next document. */
- public Document makeDocument () throws Exception;
-
- /** Set the properties */
- public void setConfig (Config config);
-
- /** Reset inputs so that the test run would behave, input wise, as if it just started. */
- public void resetInputs();
-
- /** Return how many real unique texts are available, 0 if not applicable. */
- public int numUniqueTexts();
-
- /** Return total bytes of all available unique texts, 0 if not applicable */
- public long numUniqueBytes();
-
- /** Return number of docs made since last reset. */
- public int getCount();
-
- /** Return total byte size of docs made since last reset. */
- public long getByteCount();
-
- /** Print some statistics on docs available/added/etc. */
- public void printDocStatistics();
-
- /** Set the html parser to use, when appropriate */
- public void setHTMLParser(HTMLParser htmlParser);
-
- /** Returns the htmlParser. */
- public HTMLParser getHtmlParser();
-
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.benchmark.byTask.utils.Config;
+
+
+/**
+ * Create documents for the test.
+ * <br>Each call to makeDocument would create the next document.
+ * When input is exhausted, the DocMaker iterates over the input again,
+ * does provifing a source for unlimited number of documents,
+ * though not all of them are unique.
+ */
+public interface DocMaker {
+
+ /**
+ * Create the next document, of the given size by input bytes.
+ * If the implementation does not support control over size, an exception is thrown.
+ * @param size size of document, or 0 if there is no size requirement.
+ * @exception if cannot make the document, or if size>0 was specified but this feature is not supported.
+ */
+ public Document makeDocument (int size) throws Exception;
+
+ /** Create the next document. */
+ public Document makeDocument () throws Exception;
+
+ /** Set the properties */
+ public void setConfig (Config config);
+
+ /** Reset inputs so that the test run would behave, input wise, as if it just started. */
+ public void resetInputs();
+
+ /** Return how many real unique texts are available, 0 if not applicable. */
+ public int numUniqueTexts();
+
+ /** Return total bytes of all available unique texts, 0 if not applicable */
+ public long numUniqueBytes();
+
+ /** Return number of docs made since last reset. */
+ public int getCount();
+
+ /** Return total byte size of docs made since last reset. */
+ public long getByteCount();
+
+ /** Print some statistics on docs available/added/etc. */
+ public void printDocStatistics();
+
+ /** Set the html parser to use, when appropriate */
+ public void setHTMLParser(HTMLParser htmlParser);
+
+ /** Returns the htmlParser. */
+ public HTMLParser getHtmlParser();
+
}
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/QueryMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/QueryMaker.java?view=diff&rev=524969&r1=524968&r2=524969
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/QueryMaker.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/QueryMaker.java Mon Apr 2 16:37:14 2007
@@ -1,49 +1,49 @@
-package org.apache.lucene.benchmark.byTask.feeds;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-import org.apache.lucene.search.Query;
-import org.apache.lucene.benchmark.byTask.utils.Config;
-
-
-/**
- * Create queries for the test.
- */
-public interface QueryMaker {
-
- /**
- * Create the next query, of the given size.
- * @param size the size of the query - number of terms, etc.
- * @exception if cannot make the query, or if size>0 was specified but this feature is not supported.
- */
- public Query makeQuery (int size) throws Exception;
-
- /** Create the next query */
- public Query makeQuery () throws Exception;
-
- /** Set the properties
- * @throws Exception */
- public void setConfig (Config config) throws Exception;
-
- /** Reset inputs so that the test run would behave, input wise, as if it just started. */
- public void resetInputs();
-
- /** Print the queries */
- public String printQueries();
-}
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import org.apache.lucene.search.Query;
+import org.apache.lucene.benchmark.byTask.utils.Config;
+
+
+/**
+ * Create queries for the test.
+ */
+public interface QueryMaker {
+
+ /**
+ * Create the next query, of the given size.
+ * @param size the size of the query - number of terms, etc.
+ * @exception if cannot make the query, or if size>0 was specified but this feature is not supported.
+ */
+ public Query makeQuery (int size) throws Exception;
+
+ /** Create the next query */
+ public Query makeQuery () throws Exception;
+
+ /** Set the properties
+ * @throws Exception */
+ public void setConfig (Config config) throws Exception;
+
+ /** Reset inputs so that the test run would behave, input wise, as if it just started. */
+ public void resetInputs();
+
+ /** Print the queries */
+ public String printQueries();
+}
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/QueryMaker.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java?view=diff&rev=524969&r1=524968&r2=524969
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java Mon Apr 2 16:37:14 2007
@@ -1,121 +1,121 @@
-package org.apache.lucene.benchmark.byTask.feeds;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.benchmark.byTask.utils.Config;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.text.DateFormat;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Date;
-import java.util.Locale;
-
-
-/**
- * A DocMaker using the Reuters collection for its input.
- *
- * Config properties:
- * docs.dir=<path to the docs dir| Default: reuters-out>
-
- *
- */
-public class ReutersDocMaker extends BasicDocMaker {
-
- private DateFormat dateFormat;
- private File dataDir = null;
- private ArrayList inputFiles = new ArrayList();
- private int nextFile = 0;
- private int iteration=0;
-
- /* (non-Javadoc)
- * @see SimpleDocMaker#setConfig(java.util.Properties)
- */
- public void setConfig(Config config) {
- super.setConfig(config);
- String d = config.get("docs.dir","reuters-out");
- dataDir = new File(new File("work"),d);
-
-
- collectFiles(dataDir,inputFiles);
- if (inputFiles.size()==0) {
- throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath());
- }
- // date format: 30-MAR-1987 14:22:36.87
- dateFormat = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.US);
- dateFormat.setLenient(true);
- }
-
- protected DocData getNextDocData() throws Exception {
- File f = null;
- String name = null;
- synchronized (this) {
- if (nextFile >= inputFiles.size()) {
- // exhausted files, start a new round, unless forever set to false.
- if (!forever) {
- throw new NoMoreDataException();
- }
- nextFile = 0;
- iteration++;
- }
- f = (File) inputFiles.get(nextFile++);
- name = f.getCanonicalPath()+"_"+iteration;
- }
-
- BufferedReader reader = new BufferedReader(new FileReader(f));
- String line = null;
- //First line is the date, 3rd is the title, rest is body
- String dateStr = reader.readLine();
- reader.readLine();//skip an empty line
- String title = reader.readLine();
- reader.readLine();//skip an empty line
- StringBuffer bodyBuf = new StringBuffer(1024);
- while ((line = reader.readLine()) != null) {
- bodyBuf.append(line).append(' ');
- }
- reader.close();
-
- addBytes(f.length());
-
-
- Date date = dateFormat.parse(dateStr.trim());
- return new DocData(name, bodyBuf.toString(), title, null, date);
- }
-
-
- /*
- * (non-Javadoc)
- * @see DocMaker#resetIinputs()
- */
- public synchronized void resetInputs() {
- super.resetInputs();
- nextFile = 0;
- iteration = 0;
- }
-
- /*
- * (non-Javadoc)
- * @see DocMaker#numUniqueTexts()
- */
- public int numUniqueTexts() {
- return inputFiles.size();
- }
-
-}
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.benchmark.byTask.utils.Config;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.Locale;
+
+
+/**
+ * A DocMaker using the Reuters collection for its input.
+ *
+ * Config properties:
+ * docs.dir=<path to the docs dir| Default: reuters-out>
+
+ *
+ */
+public class ReutersDocMaker extends BasicDocMaker {
+
+ private DateFormat dateFormat;
+ private File dataDir = null;
+ private ArrayList inputFiles = new ArrayList();
+ private int nextFile = 0;
+ private int iteration=0;
+
+ /* (non-Javadoc)
+ * @see SimpleDocMaker#setConfig(java.util.Properties)
+ */
+ public void setConfig(Config config) {
+ super.setConfig(config);
+ String d = config.get("docs.dir","reuters-out");
+ dataDir = new File(new File("work"),d);
+
+
+ collectFiles(dataDir,inputFiles);
+ if (inputFiles.size()==0) {
+ throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath());
+ }
+ // date format: 30-MAR-1987 14:22:36.87
+ dateFormat = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.US);
+ dateFormat.setLenient(true);
+ }
+
+ protected DocData getNextDocData() throws Exception {
+ File f = null;
+ String name = null;
+ synchronized (this) {
+ if (nextFile >= inputFiles.size()) {
+ // exhausted files, start a new round, unless forever set to false.
+ if (!forever) {
+ throw new NoMoreDataException();
+ }
+ nextFile = 0;
+ iteration++;
+ }
+ f = (File) inputFiles.get(nextFile++);
+ name = f.getCanonicalPath()+"_"+iteration;
+ }
+
+ BufferedReader reader = new BufferedReader(new FileReader(f));
+ String line = null;
+ //First line is the date, 3rd is the title, rest is body
+ String dateStr = reader.readLine();
+ reader.readLine();//skip an empty line
+ String title = reader.readLine();
+ reader.readLine();//skip an empty line
+ StringBuffer bodyBuf = new StringBuffer(1024);
+ while ((line = reader.readLine()) != null) {
+ bodyBuf.append(line).append(' ');
+ }
+ reader.close();
+
+ addBytes(f.length());
+
+
+ Date date = dateFormat.parse(dateStr.trim());
+ return new DocData(name, bodyBuf.toString(), title, null, date);
+ }
+
+
+ /*
+ * (non-Javadoc)
+ * @see DocMaker#resetIinputs()
+ */
+ public synchronized void resetInputs() {
+ super.resetInputs();
+ nextFile = 0;
+ iteration = 0;
+ }
+
+ /*
+ * (non-Javadoc)
+ * @see DocMaker#numUniqueTexts()
+ */
+ public int numUniqueTexts() {
+ return inputFiles.size();
+ }
+
+}
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java?view=diff&rev=524969&r1=524968&r2=524969
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java Mon Apr 2 16:37:14 2007
@@ -1,117 +1,117 @@
-package org.apache.lucene.benchmark.byTask.feeds;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.queryParser.QueryParser;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.WildcardQuery;
-import org.apache.lucene.search.spans.SpanFirstQuery;
-import org.apache.lucene.search.spans.SpanNearQuery;
-import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.search.spans.SpanTermQuery;
-
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.search.spans.SpanFirstQuery;
+import org.apache.lucene.search.spans.SpanNearQuery;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
-
-/**
- * A QueryMaker that makes queries devised manually (by Grant Ingersoll) for
- * searching in the Reuters collection.
- */
+
+/**
+ * A QueryMaker that makes queries devised manually (by Grant Ingersoll) for
+ * searching in the Reuters collection.
+ */
public class ReutersQueryMaker extends AbstractQueryMaker implements QueryMaker {
- private static String [] STANDARD_QUERIES = {
- //Start with some short queries
- "Salomon", "Comex", "night trading", "Japan Sony",
- //Try some Phrase Queries
- "\"Sony Japan\"", "\"food needs\"~3",
- "\"World Bank\"^2 AND Nigeria", "\"World Bank\" -Nigeria",
- "\"Ford Credit\"~5",
- //Try some longer queries
- "airline Europe Canada destination",
- "Long term pressure by trade " +
- "ministers is necessary if the current Uruguay round of talks on " +
- "the General Agreement on Trade and Tariffs (GATT) is to " +
- "succeed"
- };
-
- private static Query[] getPrebuiltQueries(String field) {
- // be wary of unanalyzed text
- return new Query[] {
- new SpanFirstQuery(new SpanTermQuery(new Term(field, "ford")), 5),
- new SpanNearQuery(new SpanQuery[]{new SpanTermQuery(new Term(field, "night")), new SpanTermQuery(new Term(field, "trading"))}, 4, false),
- new SpanNearQuery(new SpanQuery[]{new SpanFirstQuery(new SpanTermQuery(new Term(field, "ford")), 10), new SpanTermQuery(new Term(field, "credit"))}, 10, false),
- new WildcardQuery(new Term(field, "fo*")),
- };
- }
-
- /**
- * Parse the strings containing Lucene queries.
- *
- * @param qs array of strings containing query expressions
- * @param a analyzer to use when parsing queries
- * @return array of Lucene queries
- */
- private static Query[] createQueries(List qs, Analyzer a) {
- QueryParser qp = new QueryParser("body", a);
- List queries = new ArrayList();
- for (int i = 0; i < qs.size(); i++) {
- try {
-
- Object query = qs.get(i);
- Query q = null;
- if (query instanceof String) {
- q = qp.parse((String) query);
-
- } else if (query instanceof Query) {
- q = (Query) query;
-
- } else {
- System.err.println("Unsupported Query Type: " + query);
- }
-
- if (q != null) {
- queries.add(q);
- }
-
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
-
- return (Query[]) queries.toArray(new Query[0]);
- }
-
+ private static String [] STANDARD_QUERIES = {
+ //Start with some short queries
+ "Salomon", "Comex", "night trading", "Japan Sony",
+ //Try some Phrase Queries
+ "\"Sony Japan\"", "\"food needs\"~3",
+ "\"World Bank\"^2 AND Nigeria", "\"World Bank\" -Nigeria",
+ "\"Ford Credit\"~5",
+ //Try some longer queries
+ "airline Europe Canada destination",
+ "Long term pressure by trade " +
+ "ministers is necessary if the current Uruguay round of talks on " +
+ "the General Agreement on Trade and Tariffs (GATT) is to " +
+ "succeed"
+ };
+
+ private static Query[] getPrebuiltQueries(String field) {
+ // be wary of unanalyzed text
+ return new Query[] {
+ new SpanFirstQuery(new SpanTermQuery(new Term(field, "ford")), 5),
+ new SpanNearQuery(new SpanQuery[]{new SpanTermQuery(new Term(field, "night")), new SpanTermQuery(new Term(field, "trading"))}, 4, false),
+ new SpanNearQuery(new SpanQuery[]{new SpanFirstQuery(new SpanTermQuery(new Term(field, "ford")), 10), new SpanTermQuery(new Term(field, "credit"))}, 10, false),
+ new WildcardQuery(new Term(field, "fo*")),
+ };
+ }
+
+ /**
+ * Parse the strings containing Lucene queries.
+ *
+ * @param qs array of strings containing query expressions
+ * @param a analyzer to use when parsing queries
+ * @return array of Lucene queries
+ */
+ private static Query[] createQueries(List qs, Analyzer a) {
+ QueryParser qp = new QueryParser("body", a);
+ List queries = new ArrayList();
+ for (int i = 0; i < qs.size(); i++) {
+ try {
+
+ Object query = qs.get(i);
+ Query q = null;
+ if (query instanceof String) {
+ q = qp.parse((String) query);
+
+ } else if (query instanceof Query) {
+ q = (Query) query;
+
+ } else {
+ System.err.println("Unsupported Query Type: " + query);
+ }
+
+ if (q != null) {
+ queries.add(q);
+ }
+
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ return (Query[]) queries.toArray(new Query[0]);
+ }
+
protected Query[] prepareQueries() throws Exception {
- // analyzer (default is standard analyzer)
- Analyzer anlzr= (Analyzer) Class.forName(config.get("analyzer",
- "org.apache.lucene.analysis.standard.StandardAnalyzer")).newInstance();
-
- List queryList = new ArrayList(20);
- queryList.addAll(Arrays.asList(STANDARD_QUERIES));
- queryList.addAll(Arrays.asList(getPrebuiltQueries("body")));
+ // analyzer (default is standard analyzer)
+ Analyzer anlzr= (Analyzer) Class.forName(config.get("analyzer",
+ "org.apache.lucene.analysis.standard.StandardAnalyzer")).newInstance();
+
+ List queryList = new ArrayList(20);
+ queryList.addAll(Arrays.asList(STANDARD_QUERIES));
+ queryList.addAll(Arrays.asList(getPrebuiltQueries("body")));
return createQueries(queryList, anlzr);
- }
+ }
+
+
-
-
-}
+}
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java?view=diff&rev=524969&r1=524968&r2=524969
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java Mon Apr 2 16:37:14 2007
@@ -1,69 +1,69 @@
-package org.apache.lucene.benchmark.byTask.feeds;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Create documents for the test.
- */
-public class SimpleDocMaker extends BasicDocMaker {
-
- private int docID = 0;
-
- static final String DOC_TEXT = // from a public first aid info at http://firstaid.ie.eu.org
- "Well it may be a little dramatic but sometimes it true. " +
- "If you call the emergency medical services to an incident, " +
- "your actions have started the chain of survival. " +
- "You have acted to help someone you may not even know. " +
- "First aid is helping, first aid is making that call, " +
- "putting a Band-Aid on a small wound, controlling bleeding in large " +
- "wounds or providing CPR for a collapsed person whose not breathing " +
- "and heart has stopped beating. You can help yourself, your loved " +
- "ones and the stranger whose life may depend on you being in the " +
- "right place at the right time with the right knowledge.";
-
- // return a new docid
- private synchronized int newdocid() {
- return docID++;
- }
-
- /*
- * (non-Javadoc)
- * @see DocMaker#resetIinputs()
- */
- public synchronized void resetInputs() {
- super.resetInputs();
- docID = 0;
- }
-
- /*
- * (non-Javadoc)
- * @see DocMaker#numUniqueTexts()
- */
- public int numUniqueTexts() {
- return 0; // not applicable
- }
-
- protected DocData getNextDocData() throws NoMoreDataException {
- if (docID>0 && !forever) {
- throw new NoMoreDataException();
- }
- addBytes(DOC_TEXT.length());
- return new DocData("doc"+newdocid(),DOC_TEXT, null, null, null);
- }
-
-}
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Create documents for the test.
+ */
+public class SimpleDocMaker extends BasicDocMaker {
+
+ private int docID = 0;
+
+ static final String DOC_TEXT = // from a public first aid info at http://firstaid.ie.eu.org
+ "Well it may be a little dramatic but sometimes it true. " +
+ "If you call the emergency medical services to an incident, " +
+ "your actions have started the chain of survival. " +
+ "You have acted to help someone you may not even know. " +
+ "First aid is helping, first aid is making that call, " +
+ "putting a Band-Aid on a small wound, controlling bleeding in large " +
+ "wounds or providing CPR for a collapsed person whose not breathing " +
+ "and heart has stopped beating. You can help yourself, your loved " +
+ "ones and the stranger whose life may depend on you being in the " +
+ "right place at the right time with the right knowledge.";
+
+ // return a new docid
+ private synchronized int newdocid() {
+ return docID++;
+ }
+
+ /*
+ * (non-Javadoc)
+ * @see DocMaker#resetIinputs()
+ */
+ public synchronized void resetInputs() {
+ super.resetInputs();
+ docID = 0;
+ }
+
+ /*
+ * (non-Javadoc)
+ * @see DocMaker#numUniqueTexts()
+ */
+ public int numUniqueTexts() {
+ return 0; // not applicable
+ }
+
+ protected DocData getNextDocData() throws NoMoreDataException {
+ if (docID>0 && !forever) {
+ throw new NoMoreDataException();
+ }
+ addBytes(DOC_TEXT.length());
+ return new DocData("doc"+newdocid(),DOC_TEXT, null, null, null);
+ }
+
+}
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java
------------------------------------------------------------------------------
svn:eol-style = native