You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by rm...@apache.org on 2010/02/21 12:40:36 UTC

svn commit: r912333 - in /lucene/java/trunk/contrib/benchmark: build.xml src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java src/test/org/apache/lucene/benchmark/quality/reuters.578.lines.txt.bz2

Author: rmuir
Date: Sun Feb 21 11:40:36 2010
New Revision: 912333

URL: http://svn.apache.org/viewvc?rev=912333&view=rev
Log:
LUCENE-2269: don't download/extract 20,000 files for the quality test

Added:
    lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/reuters.578.lines.txt.bz2   (with props)
Modified:
    lucene/java/trunk/contrib/benchmark/build.xml
    lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java

Modified: lucene/java/trunk/contrib/benchmark/build.xml
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/build.xml?rev=912333&r1=912332&r2=912333&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/build.xml (original)
+++ lucene/java/trunk/contrib/benchmark/build.xml Sun Feb 21 11:40:36 2010
@@ -250,11 +250,6 @@
       </subant>
     </target>
 
-    <target name="init" depends="common.init,compile-demo,compile-memory,compile-highlighter,compile-vector-highlighter,check-files"/>
-
-    <!-- make sure online collections (reuters) are first downloaded -->
-    <target name="test" depends="init,get-files">
-      <antcall target="common.test" inheritRefs="true" />
-    </target>
+    <target name="init" depends="common.init,compile-demo,compile-memory,compile-highlighter,compile-vector-highlighter"/>
     
 </project>

Modified: lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java?rev=912333&r1=912332&r2=912333&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java Sun Feb 21 11:40:36 2010
@@ -20,10 +20,11 @@
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileReader;
+import java.io.InputStream;
+import java.io.InputStreamReader;
 import java.io.PrintWriter;
 
 import org.apache.lucene.benchmark.byTask.TestPerfTasksLogic;
-import org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource;
 import org.apache.lucene.benchmark.quality.Judge;
 import org.apache.lucene.benchmark.quality.QualityQuery;
 import org.apache.lucene.benchmark.quality.QualityQueryParser;
@@ -39,6 +40,10 @@
 
 /**
  * Test that quality run does its job.
+ * <p>
+ * NOTE: if the default scoring or StandardAnalyzer is changed, then
+ * this test will not work correctly, as it does not dynamically
+ * generate its test trec topics/qrels!
  */
 public class TestQualityRun extends TestCase {
 
@@ -52,14 +57,14 @@
   }
 
   public void testTrecQuality() throws Exception {
-    // first create the complete reuters index
+    // first create the partial reuters index
     createReutersIndex();
     
     File workDir = new File(System.getProperty("benchmark.work.dir","work"));
     assertTrue("Bad workDir: "+workDir, workDir.exists()&& workDir.isDirectory());
 
     int maxResults = 1000;
-    String docNameField = "docid"; 
+    String docNameField = "doctitle"; // orig docID is in the linedoc format title 
     
     PrintWriter logger = DEBUG ? new PrintWriter(System.out,true) : null;
 
@@ -105,13 +110,13 @@
         assertTrue("avg-p should be hurt: "+s.getAvp(), 1.0 > s.getAvp());
         assertTrue("recall should be hurt: "+s.getRecall(), 1.0 > s.getRecall());
         for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
-          assertEquals("p_at_"+j+" should be perfect: "+s.getPrecisionAt(j), 1.0, s.getPrecisionAt(j), 1E-9);
+          assertEquals("p_at_"+j+" should be perfect: "+s.getPrecisionAt(j), 1.0, s.getPrecisionAt(j), 1E-2);
         }
         break;
       
       case 1:
         assertTrue("avg-p should be hurt", 1.0 > s.getAvp());
-        assertEquals("recall should be perfect: "+s.getRecall(), 1.0, s.getRecall(), 1E-9);
+        assertEquals("recall should be perfect: "+s.getRecall(), 1.0, s.getRecall(), 1E-2);
         for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
           assertTrue("p_at_"+j+" should be hurt: "+s.getPrecisionAt(j), 1.0 > s.getPrecisionAt(j));
         }
@@ -126,10 +131,10 @@
         break;
 
       default: {
-        assertEquals("avg-p should be perfect: "+s.getAvp(), 1.0, s.getAvp(), 1E-9);
-        assertEquals("recall should be perfect: "+s.getRecall(), 1.0, s.getRecall(), 1E-9);
+        assertEquals("avg-p should be perfect: "+s.getAvp(), 1.0, s.getAvp(), 1E-2);
+        assertEquals("recall should be perfect: "+s.getRecall(), 1.0, s.getRecall(), 1E-2);
         for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
-          assertEquals("p_at_"+j+" should be perfect: "+s.getPrecisionAt(j), 1.0, s.getPrecisionAt(j), 1E-9);
+          assertEquals("p_at_"+j+" should be perfect: "+s.getPrecisionAt(j), 1.0, s.getPrecisionAt(j), 1E-2);
         }
       }
       
@@ -150,24 +155,12 @@
     
   }
   
-  public void testTrecTopicsReader() throws Exception {
-    File workDir = new File(System.getProperty("benchmark.work.dir","work"));
-    assertTrue("Bad workDir: " + workDir, 
-        workDir.exists() && workDir.isDirectory());
-    
-    // <tests src dir> for topics/qrels files:
-    //  src/test/org/apache/lucene/benchmark/quality
-    File srcTestDir = new File(new File(new File(new File(new File(
-      new File(new File(workDir.getAbsoluteFile().getParentFile(),
-        "src"),"test"),"org"),"apache"),"lucene"),"benchmark"),"quality");
-    
+  public void testTrecTopicsReader() throws Exception {    
     // prepare topics
-    File topicsFile = new File(srcTestDir, "trecTopics.txt");
-    assertTrue("Bad topicsFile: " + topicsFile, 
-        topicsFile.exists() && topicsFile.isFile());
+    InputStream topicsFile = getClass().getResourceAsStream("trecTopics.txt");
     TrecTopicsReader qReader = new TrecTopicsReader();
     QualityQuery qqs[] = qReader.readQueries(
-        new BufferedReader(new FileReader(topicsFile)));
+        new BufferedReader(new InputStreamReader(topicsFile, "UTF-8")));
     
     assertEquals(20, qqs.length);
     
@@ -193,12 +186,13 @@
         qq.getValue("narrative"));
   }
 
-  // use benchmark logic to create the full Reuters index
+  // use benchmark logic to create the mini Reuters index
   private void createReutersIndex() throws Exception {
     // 1. alg definition
     String algLines[] = {
         "# ----- properties ",
-        "content.source="+ReutersContentSource.class.getName(),
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters578LinesFile(),
         "content.source.log.step=2500",
         "doc.term.vector=false",
         "content.source.forever=false",
@@ -215,4 +209,9 @@
     // 2. execute the algorithm  (required in every "logic" test)
     TestPerfTasksLogic.execBenchmark(algLines);
   }
+  
+  private static String getReuters578LinesFile() {
+    return System.getProperty("lucene.common.dir").replace('\\','/') +
+      "/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/reuters.578.lines.txt.bz2";
+  }  
 }

Added: lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/reuters.578.lines.txt.bz2
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/reuters.578.lines.txt.bz2?rev=912333&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/reuters.578.lines.txt.bz2
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream