You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by do...@apache.org on 2007/07/27 22:24:53 UTC
svn commit: r560372 [1/2] - in /lucene/java/trunk: ./ contrib/benchmark/
contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/
contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/
contrib/benchmark/src/java/org/apache/lucene/b...
Author: doronc
Date: Fri Jul 27 13:24:52 2007
New Revision: 560372
URL: http://svn.apache.org/viewvc?view=rev&rev=560372
Log:
LUCENE-836: Add support for search quality benchmarking.
Added:
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/Judge.java (with props)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityBenchmark.java (with props)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQuery.java (with props)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQueryParser.java (with props)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityStats.java (with props)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/package.html (with props)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecJudge.java (with props)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java (with props)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/package.html (with props)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java (with props)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java (with props)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SimpleQQParser.java (with props)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java (with props)
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/package.html (with props)
lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/
lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java (with props)
lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/trecQRels.txt (with props)
lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/trecTopics.txt (with props)
Modified:
lucene/java/trunk/common-build.xml
lucene/java/trunk/contrib/benchmark/CHANGES.txt
lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
Modified: lucene/java/trunk/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/java/trunk/common-build.xml?view=diff&rev=560372&r1=560371&r2=560372
==============================================================================
--- lucene/java/trunk/common-build.xml (original)
+++ lucene/java/trunk/common-build.xml Fri Jul 27 13:24:52 2007
@@ -284,6 +284,8 @@
</copy>
</target>
+ <property name="tests.verbose" value="false"/>
+
<target name="test" depends="compile-test" description="Runs unit tests">
<fail unless="junit.present">
##################################################################
@@ -299,6 +301,10 @@
<assertions>
<enable package="org.apache.lucene"/>
</assertions>
+
+ <!-- allow tests to control debug prints -->
+ <sysproperty key="tests.verbose" value="${tests.verbose}"/>
+
<!-- TODO: create propertyset for test properties, so each project can have its own set -->
<sysproperty key="dataDir" file="src/test"/>
<sysproperty key="tempDir" file="${build.dir}/test"/>
Modified: lucene/java/trunk/contrib/benchmark/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/CHANGES.txt?view=diff&rev=560372&r1=560371&r2=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/CHANGES.txt (original)
+++ lucene/java/trunk/contrib/benchmark/CHANGES.txt Fri Jul 27 13:24:52 2007
@@ -4,6 +4,14 @@
$Id:$
+7/27/07
+ LUCENE-836: Add support for search quality benchmarking, running
+ a set of queries against a searcher, and, optionally produce a submission
+ report, and, if query judgements are available, compute quality measures:
+ recall, precision_at_N, average_precision, MAP. TREC specific Judge (based
+ on TREC QRels) and TREC Topics reader are included in o.a.l.benchmark.quality.trec
+ but any other format of queries and judgements can be implemented and used.
+
7/24/07
LUCENE-947: Add support for creating and index "one document per
line" from a large text file, which reduces per-document overhead of
Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/Judge.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/Judge.java?view=auto&rev=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/Judge.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/Judge.java Fri Jul 27 13:24:52 2007
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality;
+
+import java.io.PrintWriter;
+
+/**
+ * Judge if a document is relevant for a quality query.
+ */
+public interface Judge {
+
+ /**
+ * Judge if document <code>docName</code> is relevant for the given quality query.
+ * @param docName name of doc tested for relevancy.
+ * @param query tested quality query.
+ * @return true if relevant, false if not.
+ */
+ public boolean isRelevant(String docName, QualityQuery query);
+
+ /**
+ * Validate that queries and this Judge match each other.
+ * To be perfectly valid, this Judge must have some data for each and every
+ * input quality query, and must not have any data on any other quality query.
+ * <b>Note</b>: the quality benchmark run would not fail in case of imperfect
+ * validity, just a warning message would be logged.
+ * @param qq quality queries to be validated.
+ * @param logger if not null, validation issues are logged.
+ * @return true if perfectly valid, false if not.
+ */
+ public boolean validateData (QualityQuery qq[], PrintWriter logger);
+
+ /**
+ * Return the maximal recall for the input quality query.
+ * It is the number of relevant docs this Judge "knows" for the query.
+ * @param query the query whose maximal recall is needed.
+ */
+ public int maxRecall (QualityQuery query);
+
+}
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/Judge.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/Judge.java
------------------------------------------------------------------------------
svn:executable = *
Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityBenchmark.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityBenchmark.java?view=auto&rev=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityBenchmark.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityBenchmark.java Fri Jul 27 13:24:52 2007
@@ -0,0 +1,124 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality;
+
+import java.io.IOException;
+import java.io.PrintWriter;
+
+import org.apache.lucene.benchmark.quality.utils.DocNameExtractor;
+import org.apache.lucene.benchmark.quality.utils.SubmissionReport;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.Searcher;
+import org.apache.lucene.search.TopDocs;
+
+/**
+ * Main entry point for running a quality benchmark.
+ * <p>
+ * There are two main configurations for running a quality benchmark: <ul>
+ * <li>Against existing judgements.</li>
+ * <li>For submission (e.g. for a contest).</li>
+ * </ul>
+ * The first configuration requires a non null
+ * {@link org.apache.lucene.benchmark.quality.Judge Judge}.
+ * The second configuration requires a non null
+ * {@link org.apache.lucene.benchmark.quality.utils.SubmissionReport SubmissionLogger}.
+ */
+public class QualityBenchmark {
+
+ /** Quality Queries that this quality benchmark would execute. */
+ protected QualityQuery qualityQueries[];
+
+ /** Parser for turning QualityQueries into Lucene Queries. */
+ protected QualityQueryParser qqParser;
+
+ /** Index to be searched. */
+ protected Searcher searcher;
+
+ /** index field to extract doc name for each search result; used for judging the results. */
+ protected String docNameField;
+
+ /**
+ * Create a QualityBenchmark.
+ * @param qqs quality queries to run.
+ * @param qqParser parser for turning QualityQueries into Lucene Queries.
+ * @param searcher index to be searched.
+ * @param docNameField name of field containg the document name.
+ * This allows to extract the doc name for search results,
+ * and is important for judging the results.
+ */
+ public QualityBenchmark(QualityQuery qqs[], QualityQueryParser qqParser,
+ Searcher searcher, String docNameField) {
+ this.qualityQueries = qqs;
+ this.qqParser = qqParser;
+ this.searcher = searcher;
+ this.docNameField = docNameField;
+ }
+
+ /**
+ * Run the quality benchmark.
+ * @param maxResults how many results to collect for each quality query.
+ * @param judge the judge that can tell if a certain result doc is relevant for a certain quality query.
+ * If null, no judgements would be made. Usually null for a submission run.
+ * @param submitRep submission report is created if non null.
+ * @param qualityLog If not null, quality run data would be printed for each query.
+ * @return QualityStats of each quality query that was executed.
+ * @throws Exception if quality benchmark failed to run.
+ */
+ public QualityStats [] execute(int maxResults, Judge judge, SubmissionReport submitRep,
+ PrintWriter qualityLog) throws Exception {
+ QualityStats stats[] = new QualityStats[qualityQueries.length];
+ for (int i=0; i<qualityQueries.length; i++) {
+ QualityQuery qq = qualityQueries[i];
+ // generate query
+ Query q = qqParser.parse(qq);
+ // search with this query
+ long t1 = System.currentTimeMillis();
+ TopDocs td = searcher.search(q,null,maxResults);
+ long searchTime = System.currentTimeMillis()-t1;
+ //most likely we either submit or judge, but check both
+ if (judge!=null) {
+ stats[i] = analyzeQueryResults(qq, q, td, judge, qualityLog, searchTime);
+ }
+ if (submitRep!=null) {
+ submitRep.report(qq,td,docNameField,searcher);
+ }
+ }
+ return stats;
+ }
+
+ /* Analyze/judge results for a single quality query; optionally log them. */
+ private QualityStats analyzeQueryResults(QualityQuery qq, Query q, TopDocs td, Judge judge, PrintWriter logger, long searchTime) throws IOException {
+ QualityStats stts = new QualityStats(judge.maxRecall(qq),searchTime);
+ ScoreDoc sd[] = td.scoreDocs;
+ long t1 = System.currentTimeMillis(); // extraction of first doc name we meassure also construction of doc name extractor, just in case.
+ DocNameExtractor xt = new DocNameExtractor(docNameField);
+ for (int i=0; i<sd.length; i++) {
+ String docName = xt.docName(searcher,sd[i].doc);
+ long docNameExtractTime = System.currentTimeMillis() - t1;
+ t1 = System.currentTimeMillis();
+ boolean isRelevant = judge.isRelevant(docName,qq);
+ stts.addResult(i+1,isRelevant, docNameExtractTime);
+ }
+ if (logger!=null) {
+ logger.println(qq.getQueryID()+" - "+q);
+ stts.log(qq.getQueryID()+" Stats:",1,logger," ");
+ }
+ return stts;
+ }
+
+}
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityBenchmark.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQuery.java?view=auto&rev=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQuery.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQuery.java Fri Jul 27 13:24:52 2007
@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality;
+
+import java.util.Map;
+
+/**
+ * A QualityQuery has an ID and some name-value pairs.
+ * <p>
+ * The ID allows to map the quality query with its judgements.
+ * <p>
+ * The name-value pairs are used by a
+ * {@link org.apache.lucene.benchmark.quality.QualityQueryParser}
+ * to create a Lucene {@link org.apache.lucene.search.Query}.
+ * <p>
+ * It is very likely that name-value-pairs would be mapped into fields in a Lucene query,
+ * but it is up to the QualityQueryParser how to map - e.g. all values in a single field,
+ * or each pair as its own field, etc., - and this of course must match the way the
+ * searched index was constructed.
+ */
+public class QualityQuery implements Comparable {
+ private String queryID;
+ private Map nameValPairs;
+
+ /**
+ * Create a QualityQuery with given ID and name-value pairs.
+ * @param queryID ID of this quality query.
+ * @param nameValPairs the contents of this quality query.
+ */
+ public QualityQuery(String queryID, Map nameValPairs) {
+ this.queryID = queryID;
+ this.nameValPairs = nameValPairs;
+ }
+
+ /**
+ * Return all the names of name-value-pairs in this QualityQuery.
+ */
+ public String[] getNames() {
+ return (String[]) nameValPairs.keySet().toArray(new String[0]);
+ }
+
+ /**
+ * Return the value of a certain name-value pair.
+ * @param name the name whose value should be returned.
+ */
+ public String getValue(String name) {
+ return (String) nameValPairs.get(name);
+ }
+
+ /**
+ * Return the ID of this query.
+ * The ID allows to map the quality query with its judgements.
+ */
+ public String getQueryID() {
+ return queryID;
+ }
+
+ /* for a nicer sort of input queries before running them.
+ * Try first as ints, fall back to string if not int. */
+ public int compareTo(Object o) {
+ QualityQuery other = (QualityQuery) o;
+ try {
+ // compare as ints when ids ints
+ int n = Integer.parseInt(queryID);
+ int nOther = Integer.parseInt(other.queryID);
+ return n - nOther;
+ } catch (NumberFormatException e) {
+ // fall back to string comparison
+ return queryID.compareTo(other.queryID);
+ }
+ }
+
+}
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQuery.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQuery.java
------------------------------------------------------------------------------
svn:executable = *
Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQueryParser.java?view=auto&rev=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQueryParser.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQueryParser.java Fri Jul 27 13:24:52 2007
@@ -0,0 +1,34 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality;
+
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.search.Query;
+
+/**
+ * Parse a QualityQuery into a Lucene query.
+ */
+public interface QualityQueryParser {
+
+ /**
+ * Parse a given QualityQuery into a Lucene query.
+ * @param qq the quality query to be parsed.
+ * @throws ParseException if parsing failed.
+ */
+ public Query parse(QualityQuery qq) throws ParseException;
+
+}
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQueryParser.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQueryParser.java
------------------------------------------------------------------------------
svn:executable = *
Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityStats.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityStats.java?view=auto&rev=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityStats.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityStats.java Fri Jul 27 13:24:52 2007
@@ -0,0 +1,266 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality;
+
+import java.io.PrintWriter;
+import java.text.NumberFormat;
+import java.util.ArrayList;
+
+/**
+ * Results of quality benchmark run for a single query or for a set of queries.
+ */
+public class QualityStats {
+
+ /** Number of points for which precision is computed. */
+ public static final int MAX_POINTS = 20;
+
+ private double maxGoodPoints;
+ private double recall;
+ private double pAt[];
+ private double pReleventSum = 0;
+ private double numPoints = 0;
+ private double numGoodPoints = 0;
+ private long searchTime;
+ private long docNamesExtractTime;
+
+ /**
+ * A certain rank in which a relevant doc was found.
+ */
+ public static class RecallPoint {
+ private int rank;
+ private double recall;
+ private RecallPoint(int rank, double recall) {
+ this.rank = rank;
+ this.recall = recall;
+ }
+ /** Returns the rank: where on the list of returned docs this relevant doc appeared. */
+ public int getRank() {
+ return rank;
+ }
+ /** Returns the recall: how many relevant docs were returned up to this point, inclusive. */
+ public double getRecall() {
+ return recall;
+ }
+ }
+
+ private ArrayList recallPoints;
+
+ /**
+ * Construct a QualityStats object with anticipated maximal number of relevant hits.
+ * @param maxGoodPoints maximal possible relevant hits.
+ */
+ public QualityStats(double maxGoodPoints, long searchTime) {
+ this.maxGoodPoints = maxGoodPoints;
+ this.searchTime = searchTime;
+ this.recallPoints = new ArrayList();
+ pAt = new double[MAX_POINTS+1]; // pAt[0] unused.
+ }
+
+ /**
+ * Add a (possibly relevant) doc.
+ * @param n rank of the added doc (its ordinal position within the query results).
+ * @param isRelevant true if the added doc is relevant, false otherwise.
+ */
+ public void addResult(int n, boolean isRelevant, long docNameExtractTime) {
+ if (Math.abs(numPoints+1 - n) > 1E-6) {
+ throw new IllegalArgumentException("point "+n+" illegal after "+numPoints+" points!");
+ }
+ if (isRelevant) {
+ numGoodPoints+=1;
+ recallPoints.add(new RecallPoint(n,numGoodPoints));
+ }
+ numPoints = n;
+ double p = numGoodPoints / numPoints;
+ if (isRelevant) {
+ pReleventSum += p;
+ }
+ if (n<pAt.length) {
+ pAt[n] = p;
+ }
+ recall = maxGoodPoints<=0 ? p : numGoodPoints/maxGoodPoints;
+ docNamesExtractTime += docNameExtractTime;
+ }
+
+ /**
+ * Return the precision at rank n:
+ * |{relevant hits within first <code>n</code> hits}| / <code>n</code>.
+ * @param n requested precision point, must be at least 1 and at most {@link #MAX_POINTS}.
+ */
+ public double getPrecisionAt(int n) {
+ if (n<1 || n>MAX_POINTS) {
+ throw new IllegalArgumentException("n="+n+" - but it must be in [1,"+MAX_POINTS+"] range!");
+ }
+ if (n>numPoints) {
+ return (numPoints * pAt[(int)numPoints])/n;
+ }
+ return pAt[n];
+ }
+
+ /**
+ * Return the average precision at recall points: sum of precision at recall points / maxGoodPoints.
+ */
+ public double getAvp() {
+ return maxGoodPoints==0 ? 0 : pReleventSum/maxGoodPoints;
+ }
+
+ /**
+ * Return the recall: |{relevant hits}| / |{hits}|.
+ */
+ public double getRecall() {
+ return recall;
+ }
+
+ /**
+ * Log information on this QualityStats object.
+ * @param logger Logger.
+ * @param prefix prefix before each log line.
+ */
+ public void log(String title, int paddLines, PrintWriter logger, String prefix) {
+ for (int i=0; i<paddLines; i++) {
+ logger.println();
+ }
+ if (title!=null && title.trim().length()>0) {
+ logger.println(title);
+ }
+ prefix = prefix==null ? "" : prefix;
+ NumberFormat nf = NumberFormat.getInstance();
+ nf.setMaximumFractionDigits(3);
+ nf.setMinimumFractionDigits(3);
+ nf.setGroupingUsed(true);
+ int M = 19;
+ logger.println(prefix+format("Search Seconds: ",M)+
+ fracFormat(nf.format((double)searchTime/1000)));
+ logger.println(prefix+format("DocName Seconds: ",M)+
+ fracFormat(nf.format((double)docNamesExtractTime/1000)));
+ logger.println(prefix+format("Num Points: ",M)+
+ fracFormat(nf.format(numPoints)));
+ logger.println(prefix+format("Num Good Points: ",M)+
+ fracFormat(nf.format(numGoodPoints)));
+ logger.println(prefix+format("Max Good Points: ",M)+
+ fracFormat(nf.format(maxGoodPoints)));
+ logger.println(prefix+format("Average Precision: ",M)+
+ fracFormat(nf.format(getAvp())));
+ logger.println(prefix+format("Recall: ",M)+
+ fracFormat(nf.format(getRecall())));
+ for (int i=1; i<(int)numPoints && i<pAt.length; i++) {
+ logger.println(prefix+format("Precision At "+i+": ",M)+
+ fracFormat(nf.format(getPrecisionAt(i))));
+ }
+ for (int i=0; i<paddLines; i++) {
+ logger.println();
+ }
+ }
+
+ private static String padd = " ";
+ private String format(String s, int minLen) {
+ s = (s==null ? "" : s);
+ int n = Math.max(minLen,s.length());
+ return (s+padd).substring(0,n);
+ }
+ private String fracFormat(String frac) {
+ int k = frac.indexOf('.');
+ String s1 = padd+frac.substring(0,k);
+ int n = Math.max(k,6);
+ s1 = s1.substring(s1.length()-n);
+ return s1 + frac.substring(k);
+ }
+
+ /**
+ * Create a QualityStats object that is the average of the input QualityStats objects.
+ * @param stats array of input stats to be averaged.
+ * @return an average over the input stats.
+ */
+ public static QualityStats average(QualityStats[] stats) {
+ QualityStats avg = new QualityStats(0,0);
+ int m = 0; // queries with positive judgements
+ // aggregate
+ for (int i=0; i<stats.length; i++) {
+ avg.searchTime += stats[i].searchTime;
+ avg.docNamesExtractTime += stats[i].docNamesExtractTime;
+ if (stats[i].maxGoodPoints>0) {
+ m++;
+ avg.numGoodPoints += stats[i].numGoodPoints;
+ avg.numPoints += stats[i].numPoints;
+ avg.pReleventSum += stats[i].getAvp();
+ avg.recall += stats[i].recall;
+ avg.maxGoodPoints += stats[i].maxGoodPoints;
+ for (int j=1; j<avg.pAt.length; j++) {
+ avg.pAt[j] += stats[i].getPrecisionAt(j);
+ }
+ }
+ }
+ assert m>0 : "Fishy: no \"good\" queries!";
+ // take average: times go by all queries, other meassures go by "good" queries noly.
+ avg.searchTime /= stats.length;
+ avg.docNamesExtractTime /= stats.length;
+ avg.numGoodPoints /= m;
+ avg.numPoints /= m;
+ avg.recall /= m;
+ avg.maxGoodPoints /= m;
+ for (int j=1; j<avg.pAt.length; j++) {
+ avg.pAt[j] /= m;
+ }
+ avg.pReleventSum /= m; // this is actually avgp now
+ avg.pReleventSum *= avg.maxGoodPoints; // so that getAvgP() would be correct
+
+ return avg;
+ }
+
+ /**
+ * Returns the time it took to extract doc names for judging the measured query, in milliseconds.
+ */
+ public long getDocNamesExtractTime() {
+ return docNamesExtractTime;
+ }
+
+ /**
+ * Returns the maximal number of good points.
+ * This is the number of relevant docs known by the judge for the measured query.
+ */
+ public double getMaxGoodPoints() {
+ return maxGoodPoints;
+ }
+
+ /**
+ * Returns the number of good points (only relevant points).
+ */
+ public double getNumGoodPoints() {
+ return numGoodPoints;
+ }
+
+ /**
+ * Returns the number of points (both relevant and irrelevant points).
+ */
+ public double getNumPoints() {
+ return numPoints;
+ }
+
+ /**
+ * Returns the recallPoints.
+ */
+ public RecallPoint [] getRecallPoints() {
+ return (RecallPoint[]) recallPoints.toArray(new RecallPoint[0]);
+ }
+
+ /**
+ * Returns the search time in milliseconds for the measured query.
+ */
+ public long getSearchTime() {
+ return searchTime;
+ }
+
+}
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityStats.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/package.html
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/package.html?view=auto&rev=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/package.html (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/package.html Fri Jul 27 13:24:52 2007
@@ -0,0 +1,65 @@
+<html>
+<body>
+<h2>Search Quality Benchmarking.</h2>
+<p>
+This package allows to benchmark search quality of a Lucene application.
+<p>
+In order to use this package you should provide:
+<ul>
+ <li>A <a href="../../search/Searcher.html">searcher</a>.</li>
+ <li><a href="QualityQuery.html">Quality queries</a>.</li>
+ <li><a href="Judge.html">Judging object</a>.</li>
+ <li><a href="utils/SubmissionReport.html">Reporting object</a>.</li>
+</ul>
+<p>
+For benchmarking TREC collections with TREC QRels, take a look at the
+<a href="trec/package-summary.html">trec package</a>.
+<p>
+Here is a sample code used to run the TREC 2006 queries 701-850 on the .Gov2 collection:
+
+<pre>
+ File topicsFile = new File("topics-701-850.txt");
+ File qrelsFile = new File("qrels-701-850.txt");
+ Searcher searcher = new IndexSearcher("index");
+
+ int maxResults = 1000;
+ String docNameField = "docname";
+
+ PrintWriter logger = new PrintWriter(System.out,true);
+
+ // use trec utilities to read trec topics into quality queries
+ TrecTopicsReader qReader = new TrecTopicsReader();
+ QualityQuery qqs[] = qReader.readQueries(new BufferedReader(new FileReader(topicsFile)));
+
+ // prepare judge, with trec utilities that read from a QRels file
+ Judge judge = new TrecJudge(new BufferedReader(new FileReader(qrelsFile)));
+
+ // validate topics & judgments match each other
+ judge.validateData(qqs, logger);
+
+ // set the parsing of quality queries into Lucene queries.
+ QualityQueryParser qqParser = new SimpleQQParser("title", "body");
+
+ // run the benchmark
+ QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
+ SubmissionReport submitLog = null;
+ QualityStats stats[] = qrun.execute(maxResults, judge, submitLog, logger);
+
+ // print an avarage sum of the results
+ QualityStats avg = QualityStats.average(stats);
+ avg.log("SUMMARY",2,logger, " ");
+</pre>
+
+<p>
+Some immediate ways to modify this program to your needs are:
+<ul>
+ <li>To run on different formats of queries and judgements provide your own
+ <a href="Judge.html">Judge</a> and
+ <a href="QualityQuery.html">Quality queries</a>.</li>
+ <li>Create sophisticated Lucene queries by supplying a different
+ <a href="QualityQueryParser.html">Quality query parser</a>.</li>
+</ul>
+
+</body>
+
+</html>
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/package.html
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/package.html
------------------------------------------------------------------------------
svn:executable = *
Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecJudge.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecJudge.java?view=auto&rev=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecJudge.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecJudge.java Fri Jul 27 13:24:52 2007
@@ -0,0 +1,158 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality.trec;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.StringTokenizer;
+
+import org.apache.lucene.benchmark.quality.Judge;
+import org.apache.lucene.benchmark.quality.QualityQuery;
+
+/**
+ * Judge if given document is relevant to given quality query, based on Trec format for judgements.
+ */
+public class TrecJudge implements Judge {
+
+ HashMap judgements;
+
+ /**
+ * Constructor from a reader.
+ * <p>
+ * Expected input format:
+ * <pre>
+ * qnum 0 doc-name is-relevant
+ * </pre>
+ * Two sample lines:
+ * <pre>
+ * 19 0 doc303 1
+ * 19 0 doc7295 0
+ * </pre>
+ * @param reader where judgments are read from.
+ * @throws IOException
+ */
+ public TrecJudge (BufferedReader reader) throws IOException {
+ judgements = new HashMap();
+ QRelJudgement curr = null;
+ String zero = "0";
+ String line;
+
+ try {
+ while (null!=(line=reader.readLine())) {
+ line = line.trim();
+ if (line.length()==0 || '#'==line.charAt(0)) {
+ continue;
+ }
+ StringTokenizer st = new StringTokenizer(line);
+ String queryID = st.nextToken();
+ st.nextToken();
+ String docName = st.nextToken();
+ boolean relevant = !zero.equals(st.nextToken());
+ assert !st.hasMoreTokens() : "wrong format: "+line+" next: "+st.nextToken();
+ if (relevant) { // only keep relevant docs
+ if (curr==null || !curr.queryID.equals(queryID)) {
+ curr = (QRelJudgement)judgements.get(queryID);
+ if (curr==null) {
+ curr = new QRelJudgement(queryID);
+ judgements.put(queryID,curr);
+ }
+ }
+ curr.addRelevandDoc(docName);
+ }
+ }
+ } finally {
+ reader.close();
+ }
+ }
+
+ // inherit javadocs
+ public boolean isRelevant(String docName, QualityQuery query) {
+ QRelJudgement qrj = (QRelJudgement) judgements.get(query.getQueryID());
+ return qrj!=null && qrj.isRelevant(docName);
+ }
+
+ /** single Judgement of a trec quality query */
+ private static class QRelJudgement {
+ private String queryID;
+ private HashMap relevantDocs;
+
+ QRelJudgement(String queryID) {
+ this.queryID = queryID;
+ relevantDocs = new HashMap();
+ }
+
+ public void addRelevandDoc(String docName) {
+ relevantDocs.put(docName,docName);
+ }
+
+ boolean isRelevant(String docName) {
+ return relevantDocs.containsKey(docName);
+ }
+
+ public int maxRecall() {
+ return relevantDocs.size();
+ }
+ }
+
+ // inherit javadocs
+ public boolean validateData(QualityQuery[] qq, PrintWriter logger) {
+ HashMap missingQueries = (HashMap) judgements.clone();
+ ArrayList missingJudgements = new ArrayList();
+ for (int i=0; i<qq.length; i++) {
+ String id = qq[i].getQueryID();
+ if (missingQueries.containsKey(id)) {
+ missingQueries.remove(id);
+ } else {
+ missingJudgements.add(id);
+ }
+ }
+ boolean isValid = true;
+ if (missingJudgements.size()>0) {
+ isValid = false;
+ if (logger!=null) {
+ logger.println("WARNING: "+missingJudgements.size()+" queries have no judgments! - ");
+ for (int i=0; i<missingJudgements.size(); i++) {
+ logger.println(" "+(String)missingJudgements.get(i));
+ }
+ }
+ }
+ if (missingQueries.size()>0) {
+ isValid = false;
+ if (logger!=null) {
+ logger.println("WARNING: "+missingQueries.size()+" judgments match no query! - ");
+ for (Iterator it = missingQueries.keySet().iterator(); it.hasNext();) {
+ String id = (String) it.next();
+ logger.println(" "+id);
+ }
+ }
+ }
+ return isValid;
+ }
+
+ // inherit javadocs
+ public int maxRecall(QualityQuery query) {
+ QRelJudgement qrj = (QRelJudgement) judgements.get(query.getQueryID());
+ if (qrj!=null) {
+ return qrj.maxRecall();
+ }
+ return 0;
+ }
+}
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecJudge.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java?view=auto&rev=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java Fri Jul 27 13:24:52 2007
@@ -0,0 +1,123 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality.trec;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+
+import org.apache.lucene.benchmark.quality.QualityQuery;
+
+/**
+ * Read TREC topics.
+ * <p>
+ * Expects this topic format -
+ * <pre>
+ * <top>
+ * <num> Number: nnn
+ *
+ * <title> title of the topic
+ *
+ * <desc> Description:
+ * description of the topic
+ *
+ * <narr> Narrative:
+ * "story" composed by assessors.
+ *
+ * </top>
+ * </pre>
+ * Comment lines starting with '#' are ignored.
+ */
+public class TrecTopicsReader {
+
+ private static final String newline = System.getProperty("line.separator");
+
+ /**
+ * Constructor for Trec's TopicsReader
+ */
+ public TrecTopicsReader() {
+ super();
+ }
+
+ /**
+ * Read quality queries from trec format topics file.
+ * @param reader where queries are read from.
+ * @return the result quality queries.
+ * @throws IOException if cannot read the queries.
+ */
+ public QualityQuery[] readQueries(BufferedReader reader) throws IOException {
+ ArrayList res = new ArrayList();
+ StringBuffer sb;
+ try {
+ while (null!=(sb=read(reader,"<top>",null,false,false))) {
+ HashMap fields = new HashMap();
+ // id
+ sb = read(reader,"<num>",null,true,false);
+ int k = sb.indexOf(":");
+ String id = sb.substring(k+1).trim();
+ // title
+ sb = read(reader,"<title>",null,true,false);
+ k = sb.indexOf(">");
+ String title = sb.substring(k+1).trim();
+ // description
+ sb = read(reader,"<desc>",null,false,false);
+ sb = read(reader,"<narr>",null,false,true);
+ String descripion = sb.toString().trim();
+ // we got a topic!
+ fields.put("title",title);
+ fields.put("description",descripion);
+ QualityQuery topic = new QualityQuery(id,fields);
+ res.add(topic);
+ // skip narrative, get to end of doc
+ read(reader,"</top>",null,false,false);
+ }
+ } finally {
+ reader.close();
+ }
+ // sort result array (by ID)
+ QualityQuery qq[] = (QualityQuery[]) res.toArray(new QualityQuery[0]);
+ Arrays.sort(qq);
+ return qq;
+ }
+
+ // read until finding a line that starts with the specified prefix
+ private StringBuffer read (BufferedReader reader, String prefix, StringBuffer sb, boolean collectMatchLine, boolean collectAll) throws IOException {
+ sb = (sb==null ? new StringBuffer() : sb);
+ String sep = "";
+ while (true) {
+ String line = reader.readLine();
+ if (line==null) {
+ return null;
+ }
+ if (line.startsWith(prefix)) {
+ if (collectMatchLine) {
+ sb.append(sep+line);
+ sep = newline;
+ }
+ break;
+ }
+ if (collectAll) {
+ sb.append(sep+line);
+ sep = newline;
+ }
+ }
+ //System.out.println("read: "+sb);
+ return sb;
+ }
+}
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/package.html
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/package.html?view=auto&rev=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/package.html (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/package.html Fri Jul 27 13:24:52 2007
@@ -0,0 +1,6 @@
+<html>
+<body>
+Utilities for Trec related quality benchmarking, feeding from Trec Topics and QRels inputs.
+</body>
+
+</html>
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/package.html
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/package.html
------------------------------------------------------------------------------
svn:executable = *
Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java?view=auto&rev=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java Fri Jul 27 13:24:52 2007
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality.utils;
+
+import java.io.IOException;
+
+import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.document.FieldSelectorResult;
+import org.apache.lucene.search.Searcher;
+
+/**
+ * Utility: extract doc names from an index
+ */
+public class DocNameExtractor {
+
+ private FieldSelector fldSel;
+ private String docNameField;
+
+ /**
+ * Constructor for DocNameExtractor.
+ * @param docNameField name of the stored field containing the doc name.
+ */
+ public DocNameExtractor (final String docNameField) {
+ this.docNameField = docNameField;
+ fldSel = new FieldSelector() {
+ public FieldSelectorResult accept(String fieldName) {
+ return fieldName.equals(docNameField) ?
+ FieldSelectorResult.LOAD_AND_BREAK :
+ FieldSelectorResult.NO_LOAD;
+ }
+ };
+ }
+
+ /**
+ * Extract the name of the input doc from the index.
+ * @param searcher access to the index.
+ * @param docid ID of doc whose name is needed.
+ * @return the name of the input doc as extracted from the index.
+ * @throws IOException if cannot extract the doc name from the index.
+ */
+ public String docName(Searcher searcher, int docid) throws IOException {
+ return searcher.doc(docid,fldSel).get(docNameField);
+ }
+
+}
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java
------------------------------------------------------------------------------
svn:executable = *
Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java?view=auto&rev=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java Fri Jul 27 13:24:52 2007
@@ -0,0 +1,135 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality.utils;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.PriorityQueue;
+
+/**
+ * Suggest Quality queries based on an index contents.
+ * Utility class, used for making quality test benchmarks.
+ */
+public class QualityQueriesFinder {
+
+ private static final String newline = System.getProperty("line.separator");
+ private Directory dir;
+
+ /**
+ * Constrctor over a directory containing the index.
+ * @param dir directory containing the index we search for the quality test.
+ */
+ private QualityQueriesFinder(Directory dir) {
+ this.dir = dir;
+ }
+
+ /**
+ * @param args {index-dir}
+ * @throws IOException if cannot access the index.
+ */
+ public static void main(String[] args) throws IOException {
+ if (args.length<1) {
+ System.err.println("Usage: java QualityQueriesFinder <index-dir>");
+ System.exit(1);
+ }
+ QualityQueriesFinder qqf = new QualityQueriesFinder(FSDirectory.getDirectory(new File(args[0])));
+ String q[] = qqf.bestQueries("body",20);
+ for (int i=0; i<q.length; i++) {
+ System.out.println(newline+formatQueryAsTrecTopic(i,q[i],null,null));
+ }
+ }
+
+ private String [] bestQueries(String field,int numQueries) throws IOException {
+ String words[] = bestTerms("body",4*numQueries);
+ int n = words.length;
+ int m = n/4;
+ String res[] = new String[m];
+ for (int i=0; i<res.length; i++) {
+ res[i] = words[i] + " " + words[m+i]+ " " + words[n-1-m-i] + " " + words[n-1-i];
+ //System.out.println("query["+i+"]: "+res[i]);
+ }
+ return res;
+ }
+
+ private static String formatQueryAsTrecTopic (int qnum, String title, String description, String narrative) {
+ return
+ "<top>" + newline +
+ "<num> Number: " + qnum + newline + newline +
+ "<title> " + (title==null?"":title) + newline + newline +
+ "<desc> Description:" + newline +
+ (description==null?"":description) + newline + newline +
+ "<narr> Narrative:" + newline +
+ (narrative==null?"":narrative) + newline + newline +
+ "</top>";
+ }
+
+ private String [] bestTerms(String field,int numTerms) throws IOException {
+ PriorityQueue pq = new TermsDfQueue(numTerms);
+ IndexReader ir = IndexReader.open(dir);
+ try {
+ int threshold = ir.maxDoc() / 10; // ignore words too common.
+ TermEnum terms = ir.terms(new Term(field,""));
+ while (terms.next()) {
+ if (!field.equals(terms.term().field())) {
+ break;
+ }
+ int df = terms.docFreq();
+ if (df<threshold) {
+ String ttxt = terms.term().text();
+ pq.insert(new TermDf(ttxt,df));
+ }
+ }
+ } finally {
+ ir.close();
+ }
+ String res[] = new String[pq.size()];
+ int i = 0;
+ while (pq.size()>0) {
+ TermDf tdf = (TermDf) pq.pop();
+ res[i++] = tdf.word;
+ System.out.println(i+". word: "+tdf.df+" "+tdf.word);
+ }
+ return res;
+ }
+
+ private static class TermDf {
+ String word;
+ int df;
+ TermDf (String word, int freq) {
+ this.word = word;
+ this.df = freq;
+ }
+ }
+
+ private static class TermsDfQueue extends PriorityQueue {
+ TermsDfQueue (int maxSize) {
+ initialize(maxSize);
+ }
+ protected boolean lessThan(Object a, Object b) {
+ TermDf tf1 = (TermDf) a;
+ TermDf tf2 = (TermDf) b;
+ return tf1.df < tf2.df;
+ }
+ }
+
+}
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java
------------------------------------------------------------------------------
svn:executable = *
Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SimpleQQParser.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SimpleQQParser.java?view=auto&rev=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SimpleQQParser.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SimpleQQParser.java Fri Jul 27 13:24:52 2007
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality.utils;
+
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.benchmark.quality.QualityQuery;
+import org.apache.lucene.benchmark.quality.QualityQueryParser;
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.Query;
+
+/**
+ * Simplistic quality query parser. A Lucene query is created by passing
+ * the value of the specified QualityQuery name-value pair into
+ * a Lucene's QueryParser using StandardAnalyzer. */
+public class SimpleQQParser implements QualityQueryParser {
+
+ private String qqName;
+ private String indexField;
+ ThreadLocal queryParser = new ThreadLocal();
+
+ /**
+ * Constructor of a simple qq parser.
+ * @param qqName name-value pair of quality query to use for creating the query
+ * @param indexField corresponding index field
+ */
+ public SimpleQQParser(String qqName, String indexField) {
+ this.qqName = qqName;
+ this.indexField = indexField;
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.lucene.benchmark.quality.QualityQueryParser#parse(org.apache.lucene.benchmark.quality.QualityQuery)
+ */
+ public Query parse(QualityQuery qq) throws ParseException {
+ QueryParser qp = (QueryParser) queryParser.get();
+ if (qp==null) {
+ qp = new QueryParser(indexField, new StandardAnalyzer());
+ queryParser.set(qp);
+ }
+ return qp.parse(qq.getValue(qqName));
+ }
+
+}
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SimpleQQParser.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SimpleQQParser.java
------------------------------------------------------------------------------
svn:executable = *
Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java?view=auto&rev=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java Fri Jul 27 13:24:52 2007
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality.utils;
+
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.text.NumberFormat;
+
+import org.apache.lucene.benchmark.quality.QualityQuery;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.Searcher;
+import org.apache.lucene.search.TopDocs;
+
+/**
+ * Create a log ready for submission.
+ * Extend this class and override
+ * {@link #report(QualityQuery, TopDocs, String, Searcher)}
+ * to create different reports.
+ */
+public class SubmissionReport {
+
+ private NumberFormat nf;
+ private PrintWriter logger;
+
+ /**
+ * Constructor for SubmissionReport.
+ * @param logger if null, no submission data is created.
+ */
+ public SubmissionReport (PrintWriter logger) {
+ this.logger = logger;
+ nf = NumberFormat.getInstance();
+ nf.setMaximumFractionDigits(4);
+ nf.setMinimumFractionDigits(4);
+ }
+
+ /**
+ * Report a search result for a certain quality query.
+ * @param qq quality query for which the results are reported.
+ * @param td search results for the query.
+ * @param docNameField stored field used for fetching the result doc name.
+ * @param searcher index access for fetching doc name.
+ * @throws IOException in case of a problem.
+ */
+ public void report(QualityQuery qq, TopDocs td, String docNameField, Searcher searcher) throws IOException {
+ if (logger==null) {
+ return;
+ }
+ ScoreDoc sd[] = td.scoreDocs;
+ String sep = " \t ";
+ DocNameExtractor xt = new DocNameExtractor(docNameField);
+ for (int i=0; i<sd.length; i++) {
+ String docName = xt.docName(searcher,sd[i].doc);
+ logger.println(
+ qq.getQueryID() + sep +
+ '0' + sep +
+ format(docName,20) + sep +
+ format(""+i,7) + sep +
+ nf.format(sd[i].score)
+ );
+ }
+ }
+
+ private static String padd = " ";
+ private String format(String s, int minLen) {
+ s = (s==null ? "" : s);
+ int n = Math.max(minLen,s.length());
+ return (s+padd).substring(0,n);
+ }
+}
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/package.html
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/package.html?view=auto&rev=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/package.html (added)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/package.html Fri Jul 27 13:24:52 2007
@@ -0,0 +1,6 @@
+<html>
+<body>
+Miscellaneous utilities for search quality benchmarking: query parsing, submission reports.
+</body>
+
+</html>
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/package.html
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/package.html
------------------------------------------------------------------------------
svn:executable = *
Modified: lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java?view=diff&rev=560372&r1=560371&r2=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java Fri Jul 27 13:24:52 2007
@@ -23,6 +23,9 @@
import java.io.BufferedReader;
import org.apache.lucene.benchmark.byTask.Benchmark;
+import org.apache.lucene.benchmark.byTask.feeds.DocData;
+import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException;
+import org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker;
import org.apache.lucene.benchmark.byTask.tasks.CountingSearchTestTask;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
@@ -135,8 +138,8 @@
// 1. alg definition (required in every "logic" test)
String algLines[] = {
"# ----- properties ",
- "doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker",
- "doc.add.log.step=2697",
+ "doc.maker="+Reuters20DocMaker.class.getName(),
+ "doc.add.log.step=3",
"doc.term.vector=false",
"doc.maker.forever=false",
"directory=FSDirectory",
@@ -153,7 +156,7 @@
// 3. test number of docs in the index
IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
- int ndocsExpected = 21578; // that's how many docs there are in the Reuters collecton.
+ int ndocsExpected = 20; // Reuters20DocMaker exhausts after 20 docs.
assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
ir.close();
}
@@ -221,7 +224,7 @@
}
// create the benchmark and execute it.
- private Benchmark execBenchmark(String[] algLines) throws Exception {
+ public static Benchmark execBenchmark(String[] algLines) throws Exception {
String algText = algLinesToText(algLines);
logTstLogic(algText);
Benchmark benchmark = new Benchmark(new StringReader(algText));
@@ -230,7 +233,7 @@
}
// catenate alg lines to make the alg text
- private String algLinesToText(String[] algLines) {
+ private static String algLinesToText(String[] algLines) {
String indent = " ";
StringBuffer sb = new StringBuffer();
for (int i = 0; i < propLines.length; i++) {
@@ -242,11 +245,22 @@
return sb.toString();
}
- private void logTstLogic (String txt) {
+ private static void logTstLogic (String txt) {
if (!DEBUG)
return;
System.out.println("Test logic of:");
System.out.println(txt);
}
+ /** use reuters and the exhaust mechanism, but to be faster, add 20 docs only... */
+ public static class Reuters20DocMaker extends ReutersDocMaker {
+ private int nDocs=0;
+ protected DocData getNextDocData() throws Exception {
+ if (nDocs>=20 && !forever) {
+ throw new NoMoreDataException();
+ }
+ nDocs++;
+ return super.getNextDocData();
+ }
+ }
}
Added: lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java?view=auto&rev=560372
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java (added)
+++ lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java Fri Jul 27 13:24:52 2007
@@ -0,0 +1,174 @@
+package org.apache.lucene.benchmark.quality;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.PrintWriter;
+
+import org.apache.lucene.benchmark.byTask.TestPerfTasksLogic;
+import org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker;
+import org.apache.lucene.benchmark.quality.Judge;
+import org.apache.lucene.benchmark.quality.QualityQuery;
+import org.apache.lucene.benchmark.quality.QualityQueryParser;
+import org.apache.lucene.benchmark.quality.QualityBenchmark;
+import org.apache.lucene.benchmark.quality.trec.TrecJudge;
+import org.apache.lucene.benchmark.quality.trec.TrecTopicsReader;
+import org.apache.lucene.benchmark.quality.utils.SimpleQQParser;
+import org.apache.lucene.benchmark.quality.utils.SubmissionReport;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.store.FSDirectory;
+
+import junit.framework.TestCase;
+
+/**
+ * Test that quality run does its job.
+ */
+public class TestQualityRun extends TestCase {
+
+ private static boolean DEBUG = Boolean.getBoolean("tests.verbose");
+
+ /**
+ * @param arg0
+ */
+ public TestQualityRun(String name) {
+ super(name);
+ }
+
+ public void testTrecQuality() throws Exception {
+ // first create the complete reuters index
+ createReutersIndex();
+
+ File workDir = new File(System.getProperty("benchmark.work.dir","work"));
+ assertTrue("Bad workDir: "+workDir, workDir.exists()&& workDir.isDirectory());
+
+ int maxResults = 1000;
+ String docNameField = "docid";
+
+ PrintWriter logger = DEBUG ? new PrintWriter(System.out,true) : null;
+
+ // <tests src dir> for topics/qrels files - src/test/org/apache/lucene/benchmark/quality
+ File srcTestDir = new File(new File(new File(new File(new File(
+ new File(new File(workDir.getAbsoluteFile().getParentFile(),
+ "src"),"test"),"org"),"apache"),"lucene"),"benchmark"),"quality");
+
+ // prepare topics
+ File topicsFile = new File(srcTestDir, "trecTopics.txt");
+ assertTrue("Bad topicsFile: "+topicsFile, topicsFile.exists()&& topicsFile.isFile());
+ TrecTopicsReader qReader = new TrecTopicsReader();
+ QualityQuery qqs[] = qReader.readQueries(new BufferedReader(new FileReader(topicsFile)));
+
+ // prepare judge
+ File qrelsFile = new File(srcTestDir, "trecQRels.txt");
+ assertTrue("Bad qrelsFile: "+qrelsFile, qrelsFile.exists()&& qrelsFile.isFile());
+ Judge judge = new TrecJudge(new BufferedReader(new FileReader(qrelsFile)));
+
+ // validate topics & judgments match each other
+ judge.validateData(qqs, logger);
+
+ IndexSearcher searcher = new IndexSearcher(FSDirectory.getDirectory(new File(workDir,"index")));
+
+ QualityQueryParser qqParser = new SimpleQQParser("title","body");
+ QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
+
+ SubmissionReport submitLog = DEBUG ? new SubmissionReport(logger) : null;
+ QualityStats stats[] = qrun.execute(maxResults, judge, submitLog, logger);
+
+ // --------- verify by the way judgments were altered for this test:
+ // for some queries, depending on m = qnum % 8
+ // m==0: avg_precision and recall are hurt, by marking fake docs as relevant
+ // m==1: precision_at_n and avg_precision are hurt, by unmarking relevant docs
+ // m==2: all precision, precision_at_n and recall are hurt.
+ // m>=3: these queries remain perfect
+ for (int i = 0; i < stats.length; i++) {
+ QualityStats s = stats[i];
+ switch (i%8) {
+
+ case 0:
+ assertTrue("avg-p should be hurt: "+s.getAvp(), 1.0 > s.getAvp());
+ assertTrue("recall should be hurt: "+s.getRecall(), 1.0 > s.getRecall());
+ for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
+ assertEquals("p_at_"+j+" should be perfect: "+s.getPrecisionAt(j), 1.0, s.getPrecisionAt(j), 1E-9);
+ }
+ break;
+
+ case 1:
+ assertTrue("avg-p should be hurt", 1.0 > s.getAvp());
+ assertEquals("recall should be perfect: "+s.getRecall(), 1.0, s.getRecall(), 1E-9);
+ for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
+ assertTrue("p_at_"+j+" should be hurt: "+s.getPrecisionAt(j), 1.0 > s.getPrecisionAt(j));
+ }
+ break;
+
+ case 2:
+ assertTrue("avg-p should be hurt: "+s.getAvp(), 1.0 > s.getAvp());
+ assertTrue("recall should be hurt: "+s.getRecall(), 1.0 > s.getRecall());
+ for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
+ assertTrue("p_at_"+j+" should be hurt: "+s.getPrecisionAt(j), 1.0 > s.getPrecisionAt(j));
+ }
+ break;
+
+ default: {
+ assertEquals("avg-p should be perfect: "+s.getAvp(), 1.0, s.getAvp(), 1E-9);
+ assertEquals("recall should be perfect: "+s.getRecall(), 1.0, s.getRecall(), 1E-9);
+ for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
+ assertEquals("p_at_"+j+" should be perfect: "+s.getPrecisionAt(j), 1.0, s.getPrecisionAt(j), 1E-9);
+ }
+ }
+
+ }
+ }
+
+ QualityStats avg = QualityStats.average(stats);
+ if (logger!=null) {
+ avg.log("Average statistis:",1,logger," ");
+ }
+
+ assertTrue("mean avg-p should be hurt: "+avg.getAvp(), 1.0 > avg.getAvp());
+ assertTrue("avg recall should be hurt: "+avg.getRecall(), 1.0 > avg.getRecall());
+ for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
+ assertTrue("avg p_at_"+j+" should be hurt: "+avg.getPrecisionAt(j), 1.0 > avg.getPrecisionAt(j));
+ }
+
+
+ }
+
+ // use benchmark logic to create the full Reuters index
+ private void createReutersIndex() throws Exception {
+ // 1. alg definition
+ String algLines[] = {
+ "# ----- properties ",
+ "doc.maker="+ReutersDocMaker.class.getName(),
+ "doc.add.log.step=2500",
+ "doc.term.vector=false",
+ "doc.maker.forever=false",
+ "directory=FSDirectory",
+ "doc.stored=true",
+ "doc.tokenized=true",
+ "# ----- alg ",
+ "ResetSystemErase",
+ "CreateIndex",
+ "{ AddDoc } : *",
+ "CloseIndex",
+ };
+
+ // 2. execute the algorithm (required in every "logic" test)
+ TestPerfTasksLogic.execBenchmark(algLines);
+ }
+}
Propchange: lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java
------------------------------------------------------------------------------
svn:eol-style = native