You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by do...@apache.org on 2011/10/09 20:01:37 UTC
svn commit: r1180674 - in /lucene/dev/trunk: ./ lucene/ lucene/contrib/
modules/benchmark/ modules/benchmark/conf/
modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/
modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ modul...
Author: doronc
Date: Sun Oct 9 18:01:36 2011
New Revision: 1180674
URL: http://svn.apache.org/viewvc?rev=1180674&view=rev
Log:
LUCENE-3261: Facet benchmarking - indexing support - ported from 3x.
Added:
lucene/dev/trunk/modules/benchmark/conf/facets.alg
- copied unchanged from r1180637, lucene/dev/branches/branch_3x/lucene/contrib/benchmark/conf/facets.alg
lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentItemsSource.java
- copied unchanged from r1180637, lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentItemsSource.java
lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FacetSource.java
- copied unchanged from r1180637, lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FacetSource.java
lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/RandomFacetSource.java
- copied unchanged from r1180637, lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/RandomFacetSource.java
lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddFacetedDocTask.java
- copied unchanged from r1180637, lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddFacetedDocTask.java
lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyIndexTask.java
- copied unchanged from r1180637, lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyIndexTask.java
lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyReaderTask.java
- copied unchanged from r1180637, lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyReaderTask.java
lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CommitTaxonomyIndexTask.java
- copied unchanged from r1180637, lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CommitTaxonomyIndexTask.java
lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateTaxonomyIndexTask.java
- copied unchanged from r1180637, lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateTaxonomyIndexTask.java
lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyIndexTask.java
- copied unchanged from r1180637, lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyIndexTask.java
lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyReaderTask.java
- copied unchanged from r1180637, lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyReaderTask.java
Modified:
lucene/dev/trunk/ (props changed)
lucene/dev/trunk/lucene/ (props changed)
lucene/dev/trunk/lucene/contrib/contrib-build.xml
lucene/dev/trunk/modules/benchmark/CHANGES.txt
lucene/dev/trunk/modules/benchmark/build.xml
lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java
lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java
lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java
lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java
lucene/dev/trunk/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
lucene/dev/trunk/solr/ (props changed)
Modified: lucene/dev/trunk/lucene/contrib/contrib-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/contrib-build.xml?rev=1180674&r1=1180673&r2=1180674&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/contrib-build.xml (original)
+++ lucene/dev/trunk/lucene/contrib/contrib-build.xml Sun Oct 9 18:01:36 2011
@@ -141,6 +141,17 @@
<property name="analyzers-common.uptodate" value="true"/>
</target>
+ <property name="facet.jar" value="${common.dir}/../modules/facet/build/lucene-facet-${version}.jar"/>
+ <target name="check-facet-uptodate" unless="facet.uptodate">
+ <module-uptodate name="facet" jarfile="${facet.jar}" property="facet.uptodate"/>
+ </target>
+ <target name="jar-facet" unless="facet.uptodate" depends="check-facet-uptodate">
+ <ant dir="${common.dir}/../modules/facet" target="jar-core" inheritall="false">
+ <propertyset refid="uptodate.and.compiled.properties"/>
+ </ant>
+ <property name="facet.uptodate" value="true"/>
+ </target>
+
<property name="analyzers-icu.jar" value="${common.dir}/../modules/analysis/build/icu/lucene-analyzers-icu-${version}.jar"/>
<target name="check-analyzers-icu-uptodate" unless="analyzers-icu.uptodate">
<module-uptodate name="analysis/icu" jarfile="${analyzers-icu.jar}" property="analyzers-icu.uptodate"/>
Modified: lucene/dev/trunk/modules/benchmark/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/CHANGES.txt?rev=1180674&r1=1180673&r2=1180674&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/CHANGES.txt (original)
+++ lucene/dev/trunk/modules/benchmark/CHANGES.txt Sun Oct 9 18:01:36 2011
@@ -5,6 +5,10 @@ The Benchmark contrib package contains c
For more information on past and future Lucene versions, please see:
http://s.apache.org/luceneversions
+10/07/2011
+ LUCENE-3262: Facet benchmarking - Benchmark tasks and sources were added for indexing
+ with facets, demonstrated in facets.alg. (Gilad Barkai, Doron Cohen)
+
09/25/2011
LUCENE-3457: Upgrade commons-compress to 1.2 (and undo LUCENE-2980's workaround).
(Doron Cohen)
Modified: lucene/dev/trunk/modules/benchmark/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/build.xml?rev=1180674&r1=1180673&r2=1180674&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/build.xml (original)
+++ lucene/dev/trunk/modules/benchmark/build.xml Sun Oct 9 18:01:36 2011
@@ -153,6 +153,7 @@
<pathelement path="${highlighter.jar}"/>
<pathelement path="${analyzers-common.jar}"/>
<pathelement path="${queryparser.jar}"/>
+ <pathelement path="${facet.jar}"/>
<path refid="base.classpath"/>
<fileset dir="lib">
<include name="**/*.jar"/>
@@ -241,7 +242,7 @@
<echo>Benchmark output in JIRA table format is in file: ${shingle.jira.output.file}</echo>
</target>
- <target name="init" depends="contrib-build.init,jar-memory,jar-highlighter,jar-analyzers-common,jar-queryparser"/>
+ <target name="init" depends="contrib-build.init,jar-memory,jar-highlighter,jar-analyzers-common,jar-queryparser,jar-facet"/>
<target name="clean-javacc">
<fileset dir="src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml" includes="*.java">
Modified: lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java?rev=1180674&r1=1180673&r2=1180674&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java (original)
+++ lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java Sun Oct 9 18:01:36 2011
@@ -24,6 +24,7 @@ import java.util.Locale;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
+import org.apache.lucene.benchmark.byTask.feeds.FacetSource;
import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
import org.apache.lucene.benchmark.byTask.stats.Points;
import org.apache.lucene.benchmark.byTask.tasks.ReadTask;
@@ -31,12 +32,15 @@ import org.apache.lucene.benchmark.byTas
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.benchmark.byTask.utils.FileUtils;
import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.IOUtils;
/**
* Data maintained by a performance test run.
@@ -45,11 +49,21 @@ import org.apache.lucene.store.RAMDirect
* <ul>
* <li>Configuration.
* <li>Directory, Writer, Reader.
- * <li>Docmaker and a few instances of QueryMaker.
+ * <li>Taxonomy Directory, Writer, Reader.
+ * <li>DocMaker, FacetSource and a few instances of QueryMaker.
* <li>Analyzer.
* <li>Statistics data which updated during the run.
* </ul>
- * Config properties: work.dir=<path to root of docs and index dirs| Default: work>
+ * Config properties:
+ * <ul>
+ * <li><b>work.dir</b>=<path to root of docs and index dirs| Default: work>
+ * <li><b>analyzer</b>=<class name for analyzer| Default: StandardAnalyzer>
+ * <li><b>doc.maker</b>=<class name for doc-maker| Default: DocMaker>
+ * <li><b>facet.source</b>=<class name for facet-source| Default: RandomFacetSource>
+ * <li><b>query.maker</b>=<class name for query-maker| Default: SimpleQueryMaker>
+ * <li><b>log.queries</b>=<whether queries should be printed| Default: false>
+ * <li><b>directory</b>=<type of directory to use for the index| Default: RAMDirectory>
+ * <li><b>taxonomy.directory</b>=<type of directory for taxonomy index| Default: RAMDirectory>
* </ul>
*/
public class PerfRunData {
@@ -62,7 +76,12 @@ public class PerfRunData {
private Directory directory;
private Analyzer analyzer;
private DocMaker docMaker;
+ private FacetSource facetSource;
private Locale locale;
+
+ private Directory taxonomyDir;
+ private TaxonomyWriter taxonomyWriter;
+ private TaxonomyReader taxonomyReader;
// we use separate (identical) instances for each "read" task type, so each can iterate the quries separately.
private HashMap<Class<? extends ReadTask>,QueryMaker> readTaskQueryMaker;
@@ -73,6 +92,7 @@ public class PerfRunData {
private IndexWriter indexWriter;
private Config config;
private long startTimeMillis;
+
// constructor
public PerfRunData (Config config) throws Exception {
@@ -84,6 +104,10 @@ public class PerfRunData {
docMaker = Class.forName(config.get("doc.maker",
"org.apache.lucene.benchmark.byTask.feeds.DocMaker")).asSubclass(DocMaker.class).newInstance();
docMaker.setConfig(config);
+ // facet source
+ facetSource = Class.forName(config.get("facet.source",
+ "org.apache.lucene.benchmark.byTask.feeds.RandomFacetSource")).asSubclass(FacetSource.class).newInstance();
+ facetSource.setConfig(config);
// query makers
readTaskQueryMaker = new HashMap<Class<? extends ReadTask>,QueryMaker>();
qmkrClass = Class.forName(config.get("query.maker","org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker")).asSubclass(QueryMaker.class);
@@ -104,30 +128,17 @@ public class PerfRunData {
public void reinit(boolean eraseIndex) throws Exception {
// cleanup index
- if (indexWriter!=null) {
- indexWriter.close();
- indexWriter = null;
- }
- if (indexReader!=null) {
- indexReader.close();
- indexReader = null;
- }
- if (directory!=null) {
- directory.close();
- }
+ IOUtils.close(indexWriter, indexReader, directory);
+ indexWriter = null;
+ indexReader = null;
+
+ IOUtils.close(taxonomyWriter, taxonomyReader, taxonomyDir);
+ taxonomyWriter = null;
+ taxonomyReader = null;
// directory (default is ram-dir).
- if ("FSDirectory".equals(config.get("directory","RAMDirectory"))) {
- File workDir = new File(config.get("work.dir","work"));
- File indexDir = new File(workDir,"index");
- if (eraseIndex && indexDir.exists()) {
- FileUtils.fullyDelete(indexDir);
- }
- indexDir.mkdirs();
- directory = FSDirectory.open(indexDir);
- } else {
- directory = new RAMDirectory();
- }
+ directory = createDirectory(eraseIndex, "index", "directory");
+ taxonomyDir = createDirectory(eraseIndex, "taxo", "taxonomy.directory");
// inputs
resetInputs();
@@ -139,6 +150,21 @@ public class PerfRunData {
// Re-init clock
setStartTimeMillis();
}
+
+ private Directory createDirectory(boolean eraseIndex, String dirName,
+ String dirParam) throws IOException {
+ if ("FSDirectory".equals(config.get(dirParam,"RAMDirectory"))) {
+ File workDir = new File(config.get("work.dir","work"));
+ File indexDir = new File(workDir,dirName);
+ if (eraseIndex && indexDir.exists()) {
+ FileUtils.fullyDelete(indexDir);
+ }
+ indexDir.mkdirs();
+ return FSDirectory.open(indexDir);
+ }
+
+ return new RAMDirectory();
+ }
public long setStartTimeMillis() {
startTimeMillis = System.currentTimeMillis();
@@ -174,6 +200,57 @@ public class PerfRunData {
}
/**
+ * @return Returns the taxonomy directory
+ */
+ public Directory getTaxonomyDir() {
+ return taxonomyDir;
+ }
+
+ /**
+ * Set the taxonomy reader. Takes ownership of that taxonomy reader, that is,
+ * internally performs taxoReader.incRef() (If caller no longer needs that
+ * reader it should decRef()/close() it after calling this method, otherwise,
+ * the reader will remain open).
+ * @param taxoReader The taxonomy reader to set.
+ */
+ public synchronized void setTaxonomyReader(TaxonomyReader taxoReader) throws IOException {
+ if (taxoReader == this.taxonomyReader) {
+ return;
+ }
+ if (taxonomyReader != null) {
+ taxonomyReader.decRef();
+ }
+
+ if (taxoReader != null) {
+ taxoReader.incRef();
+ }
+ this.taxonomyReader = taxoReader;
+ }
+
+ /**
+ * @return Returns the taxonomyReader. NOTE: this returns a
+ * reference. You must call TaxonomyReader.decRef() when
+ * you're done.
+ */
+ public synchronized TaxonomyReader getTaxonomyReader() {
+ if (taxonomyReader != null) {
+ taxonomyReader.incRef();
+ }
+ return taxonomyReader;
+ }
+
+ /**
+ * @param taxoWriter The taxonomy writer to set.
+ */
+ public void setTaxonomyWriter(TaxonomyWriter taxoWriter) {
+ this.taxonomyWriter = taxoWriter;
+ }
+
+ public TaxonomyWriter getTaxonomyWriter() {
+ return taxonomyWriter;
+ }
+
+ /**
* @return Returns the indexReader. NOTE: this returns a
* reference. You must call IndexReader.decRef() when
* you're done.
@@ -198,13 +275,22 @@ public class PerfRunData {
}
/**
+ * Set the index reader. Takes ownership of that index reader, that is,
+ * internally performs indexReader.incRef() (If caller no longer needs that
+ * reader it should decRef()/close() it after calling this method, otherwise,
+ * the reader will remain open).
* @param indexReader The indexReader to set.
*/
public synchronized void setIndexReader(IndexReader indexReader) throws IOException {
+ if (indexReader == this.indexReader) {
+ return;
+ }
+
if (this.indexReader != null) {
// Release current IR
this.indexReader.decRef();
}
+
this.indexReader = indexReader;
if (indexReader != null) {
// Hold reference to new IR
@@ -246,6 +332,11 @@ public class PerfRunData {
return docMaker;
}
+ /** Returns the facet source. */
+ public FacetSource getFacetSource() {
+ return facetSource;
+ }
+
/**
* @return the locale
*/
@@ -269,6 +360,7 @@ public class PerfRunData {
public void resetInputs() throws IOException {
docMaker.resetInputs();
+ facetSource.resetInputs();
for (final QueryMaker queryMaker : readTaskQueryMaker.values()) {
queryMaker.resetInputs();
}
Modified: lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java?rev=1180674&r1=1180673&r2=1180674&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java (original)
+++ lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java Sun Oct 9 18:01:36 2011
@@ -17,12 +17,7 @@ package org.apache.lucene.benchmark.byTa
* limitations under the License.
*/
-import java.io.File;
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-
-import org.apache.lucene.benchmark.byTask.utils.Config;
/**
* Represents content from a specified source, such as TREC, Reuters etc. A
@@ -31,119 +26,13 @@ import org.apache.lucene.benchmark.byTas
* of various statistics, such as how many documents were generated, size in
* bytes etc.
* <p>
- * Supports the following configuration parameters:
- * <ul>
- * <li><b>content.source.forever</b> - specifies whether to generate documents
- * forever (<b>default=true</b>).
- * <li><b>content.source.verbose</b> - specifies whether messages should be
- * output by the content source (<b>default=false</b>).
- * <li><b>content.source.encoding</b> - specifies which encoding to use when
- * reading the files of that content source. Certain implementations may define
- * a default value if this parameter is not specified. (<b>default=null</b>).
- * <li><b>content.source.log.step</b> - specifies for how many documents a
- * message should be logged. If set to 0 it means no logging should occur.
- * <b>NOTE:</b> if verbose is set to false, logging should not occur even if
- * logStep is not 0 (<b>default=0</b>).
- * </ul>
+ * For supported configuration parameters see {@link ContentItemsSource}.
*/
-public abstract class ContentSource {
-
- private long bytesCount;
- private long totalBytesCount;
- private int docsCount;
- private int totalDocsCount;
- private Config config;
-
- protected boolean forever;
- protected int logStep;
- protected boolean verbose;
- protected String encoding;
-
- /** update count of bytes generated by this source */
- protected final synchronized void addBytes(long numBytes) {
- bytesCount += numBytes;
- totalBytesCount += numBytes;
- }
-
- /** update count of documents generated by this source */
- protected final synchronized void addDoc() {
- ++docsCount;
- ++totalDocsCount;
- }
-
- /**
- * A convenience method for collecting all the files of a content source from
- * a given directory. The collected {@link File} instances are stored in the
- * given <code>files</code>.
- */
- protected final void collectFiles(File dir, ArrayList<File> files) {
- if (!dir.canRead()) {
- return;
- }
-
- File[] dirFiles = dir.listFiles();
- Arrays.sort(dirFiles);
- for (int i = 0; i < dirFiles.length; i++) {
- File file = dirFiles[i];
- if (file.isDirectory()) {
- collectFiles(file, files);
- } else if (file.canRead()) {
- files.add(file);
- }
- }
- }
-
- /**
- * Returns true whether it's time to log a message (depending on verbose and
- * the number of documents generated).
- */
- protected final boolean shouldLog() {
- return verbose && logStep > 0 && docsCount % logStep == 0;
- }
-
- /** Called when reading from this content source is no longer required. */
- public abstract void close() throws IOException;
+public abstract class ContentSource extends ContentItemsSource {
- /** Returns the number of bytes generated since last reset. */
- public final long getBytesCount() { return bytesCount; }
-
- /** Returns the number of generated documents since last reset. */
- public final int getDocsCount() { return docsCount; }
-
- public final Config getConfig() { return config; }
-
- /** Returns the next {@link DocData} from the content source. */
+ /** Returns the next {@link DocData} from the content source.
+ * Implementations must account for multi-threading, as multiple threads
+ * can call this method simultaneously. */
public abstract DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException;
- /** Returns the total number of bytes that were generated by this source. */
- public final long getTotalBytesCount() { return totalBytesCount; }
-
- /** Returns the total number of generated documents. */
- public final int getTotalDocsCount() { return totalDocsCount; }
-
- /**
- * Resets the input for this content source, so that the test would behave as
- * if it was just started, input-wise.
- * <p>
- * <b>NOTE:</b> the default implementation resets the number of bytes and
- * documents generated since the last reset, so it's important to call
- * super.resetInputs in case you override this method.
- */
- public void resetInputs() throws IOException {
- bytesCount = 0;
- docsCount = 0;
- }
-
- /**
- * Sets the {@link Config} for this content source. If you override this
- * method, you must call super.setConfig.
- */
- public void setConfig(Config config) {
- this.config = config;
- forever = config.get("content.source.forever", true);
- logStep = config.get("content.source.log.step", 0);
- verbose = config.get("content.source.verbose", false);
- encoding = config.get("content.source.encoding", null);
- }
-
}
Modified: lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java?rev=1180674&r1=1180673&r2=1180674&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java (original)
+++ lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java Sun Oct 9 18:01:36 2011
@@ -31,7 +31,6 @@ import java.text.SimpleDateFormat;
import java.text.ParsePosition;
import org.apache.lucene.benchmark.byTask.utils.Config;
-import org.apache.lucene.benchmark.byTask.utils.Format;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
@@ -186,13 +185,8 @@ public class DocMaker {
protected boolean reuseFields;
protected boolean indexProperties;
- private int lastPrintedNumUniqueTexts = 0;
-
- private long lastPrintedNumUniqueBytes = 0;
private final AtomicInteger numDocsCreated = new AtomicInteger();
- private int printNum = 0;
-
public DocMaker() {
}
@@ -400,38 +394,9 @@ public class DocMaker {
return doc;
}
- public void printDocStatistics() {
- boolean print = false;
- String col = " ";
- StringBuilder sb = new StringBuilder();
- String newline = System.getProperty("line.separator");
- sb.append("------------> ").append(getClass().getSimpleName()).append(" statistics (").append(printNum).append("): ").append(newline);
- int nut = source.getTotalDocsCount();
- if (nut > lastPrintedNumUniqueTexts) {
- print = true;
- sb.append("total count of unique texts: ").append(Format.format(0,nut,col)).append(newline);
- lastPrintedNumUniqueTexts = nut;
- }
- long nub = getTotalBytesCount();
- if (nub > lastPrintedNumUniqueBytes) {
- print = true;
- sb.append("total bytes of unique texts: ").append(Format.format(0,nub,col)).append(newline);
- lastPrintedNumUniqueBytes = nub;
- }
- if (source.getDocsCount() > 0) {
- print = true;
- sb.append("num docs added since last inputs reset: ").append(Format.format(0,source.getDocsCount(),col)).append(newline);
- sb.append("total bytes added since last inputs reset: ").append(Format.format(0,getBytesCount(),col)).append(newline);
- }
- if (print) {
- System.out.println(sb.append(newline).toString());
- printNum++;
- }
- }
-
/** Reset inputs so that the test run would behave, input wise, as if it just started. */
public synchronized void resetInputs() throws IOException {
- printDocStatistics();
+ source.printStatistics("docs");
// re-initiate since properties by round may have changed.
setConfig(config);
source.resetInputs();
Modified: lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java?rev=1180674&r1=1180673&r2=1180674&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java (original)
+++ lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java Sun Oct 9 18:01:36 2011
@@ -289,7 +289,7 @@ public class TrecContentSource extends C
// here, everything else is already private to that thread, so we're safe.
try {
docData = trecDocParser.parse(docData, name, this, docBuf, parsedPathType);
- addDoc();
+ addItem();
} catch (InterruptedException ie) {
throw new ThreadInterruptedException(ie);
}
Modified: lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java?rev=1180674&r1=1180673&r2=1180674&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java (original)
+++ lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java Sun Oct 9 18:01:36 2011
@@ -17,12 +17,14 @@ package org.apache.lucene.benchmark.byTa
* limitations under the License.
*/
+import java.text.NumberFormat;
+
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
import org.apache.lucene.document.Document;
/**
- * Add a document, optionally with of a certain size.
+ * Add a document, optionally of a certain size.
* <br>Other side effects: none.
* <br>Takes optional param: document size.
*/
@@ -34,9 +36,12 @@ public class AddDocTask extends PerfTask
private int docSize = 0;
- // volatile data passed between setup(), doLogic(), tearDown().
- private Document doc = null;
-
+ /**
+ * volatile data passed between setup(), doLogic(), tearDown().
+ * the doc is created at setup() and added at doLogic().
+ */
+ protected Document doc = null;
+
@Override
public void setup() throws Exception {
super.setup();
@@ -56,7 +61,7 @@ public class AddDocTask extends PerfTask
@Override
protected String getLogMessage(int recsCount) {
- return "added " + recsCount + " docs";
+ return String.format("added %9d docs",recsCount);
}
@Override
Modified: lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java?rev=1180674&r1=1180673&r2=1180674&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java (original)
+++ lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java Sun Oct 9 18:01:36 2011
@@ -17,8 +17,6 @@ package org.apache.lucene.benchmark.byTa
* limitations under the License.
*/
-import java.text.NumberFormat;
-
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.stats.Points;
import org.apache.lucene.benchmark.byTask.stats.TaskStats;
@@ -270,9 +268,7 @@ public abstract class PerfTask implement
public void tearDown() throws Exception {
if (++logStepCount % logStep == 0) {
double time = (System.currentTimeMillis() - runData.getStartTimeMillis()) / 1000.0;
- NumberFormat nf = NumberFormat.getInstance();
- nf.setMaximumFractionDigits(2);
- System.out.println(nf.format(time) + " sec --> "
+ System.out.println(String.format("%7.2f",time) + " sec --> "
+ Thread.currentThread().getName() + " " + getLogMessage(logStepCount));
}
}
Modified: lucene/dev/trunk/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java?rev=1180674&r1=1180673&r2=1180674&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (original)
+++ lucene/dev/trunk/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java Sun Oct 9 18:01:36 2011
@@ -40,6 +40,7 @@ import org.apache.lucene.benchmark.byTas
import org.apache.lucene.collation.CollationKeyAnalyzer;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldsEnum;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
@@ -781,6 +782,42 @@ public class TestPerfTasksLogic extends
}
/**
+ * Test indexing with facets tasks.
+ */
+ public void testIndexingWithFacets() throws Exception {
+ // 1. alg definition (required in every "logic" test)
+ String algLines[] = {
+ "# ----- properties ",
+ "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+ "docs.file=" + getReuters20LinesFile(),
+ "content.source.log.step=100",
+ "content.source.forever=false",
+ "directory=RAMDirectory",
+ "doc.stored=false",
+ "merge.factor=3",
+ "doc.tokenized=false",
+ "debug.level=1",
+ "# ----- alg ",
+ "ResetSystemErase",
+ "CreateIndex",
+ "CreateTaxonomyIndex",
+ "{ \"AddDocs\" AddFacetedDoc > : * ",
+ "CloseIndex",
+ "CloseTaxonomyIndex",
+ "OpenTaxonomyReader",
+ };
+
+ // 2. execute the algorithm (required in every "logic" test)
+ Benchmark benchmark = execBenchmark(algLines);
+ PerfRunData runData = benchmark.getRunData();
+ assertNull("taxo writer was not properly closed",runData.getTaxonomyWriter());
+ TaxonomyReader taxoReader = runData.getTaxonomyReader();
+ assertNotNull("taxo reader was not opened", taxoReader);
+ assertTrue("nothing was added to the taxnomy (expecting root and at least one addtional category)",taxoReader.getSize()>1);
+ taxoReader.close();
+ }
+
+ /**
* Test that we can call optimize(maxNumSegments).
*/
public void testOptimizeMaxNumSegments() throws Exception {