You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by rm...@apache.org on 2010/01/14 20:33:51 UTC

svn commit: r899369 - in /lucene/java/trunk/contrib/benchmark: ./ src/java/org/apache/lucene/benchmark/quality/trec/ src/test/org/apache/lucene/benchmark/quality/

Author: rmuir
Date: Thu Jan 14 19:33:48 2010
New Revision: 899369

URL: http://svn.apache.org/viewvc?rev=899369&view=rev
Log:
LUCENE-2210: fix TrecTopicsReader for descriptions and narratives

Modified:
    lucene/java/trunk/contrib/benchmark/CHANGES.txt
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java
    lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java
    lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/trecTopics.txt

Modified: lucene/java/trunk/contrib/benchmark/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/CHANGES.txt?rev=899369&r1=899368&r2=899369&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/CHANGES.txt (original)
+++ lucene/java/trunk/contrib/benchmark/CHANGES.txt Thu Jan 14 19:33:48 2010
@@ -4,6 +4,10 @@
 
 $Id:$
 
+1/14/2010
+  LUCENE-2210: TrecTopicsReader now properly reads descriptions and
+  narratives from trec topics files.  (Robert Muir)
+
 1/11/2010
   LUCENE-2181: Add a benchmark for collation. This adds NewLocaleTask,
   which sets a Locale in the run data for collation to use, and can be

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java?rev=899369&r1=899368&r2=899369&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java Thu Jan 14 19:33:48 2010
@@ -76,16 +76,31 @@
         k = sb.indexOf(">");
         String title = sb.substring(k+1).trim();
         // description
-        sb = read(reader,"<desc>",null,false,false);
-        sb = read(reader,"<narr>",null,false,true);
-        String descripion = sb.toString().trim();
+        read(reader,"<desc>",null,false,false);
+        sb.setLength(0);
+        String line = null;
+        while ((line = reader.readLine()) != null) {
+          if (line.startsWith("<narr>"))
+            break;
+          if (sb.length() > 0) sb.append(' ');
+          sb.append(line);
+        }
+        String description = sb.toString().trim();
+        // narrative
+        sb.setLength(0);
+        while ((line = reader.readLine()) != null) {
+          if (line.startsWith("</top>"))
+            break;
+          if (sb.length() > 0) sb.append(' ');
+          sb.append(line);
+        }
+        String narrative = sb.toString().trim();
         // we got a topic!
         fields.put("title",title);
-        fields.put("description",descripion);
+        fields.put("description",description);
+        fields.put("narrative", narrative);
         QualityQuery topic = new QualityQuery(id,fields);
         res.add(topic);
-        // skip narrative, get to end of doc
-        read(reader,"</top>",null,false,false);
       }
     } finally {
       reader.close();

Modified: lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java?rev=899369&r1=899368&r2=899369&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java Thu Jan 14 19:33:48 2010
@@ -149,6 +149,49 @@
 
     
   }
+  
+  public void testTrecTopicsReader() throws Exception {
+    File workDir = new File(System.getProperty("benchmark.work.dir","work"));
+    assertTrue("Bad workDir: " + workDir, 
+        workDir.exists() && workDir.isDirectory());
+    
+    // <tests src dir> for topics/qrels files:
+    //  src/test/org/apache/lucene/benchmark/quality
+    File srcTestDir = new File(new File(new File(new File(new File(
+      new File(new File(workDir.getAbsoluteFile().getParentFile(),
+        "src"),"test"),"org"),"apache"),"lucene"),"benchmark"),"quality");
+    
+    // prepare topics
+    File topicsFile = new File(srcTestDir, "trecTopics.txt");
+    assertTrue("Bad topicsFile: " + topicsFile, 
+        topicsFile.exists() && topicsFile.isFile());
+    TrecTopicsReader qReader = new TrecTopicsReader();
+    QualityQuery qqs[] = qReader.readQueries(
+        new BufferedReader(new FileReader(topicsFile)));
+    
+    assertEquals(20, qqs.length);
+    
+    QualityQuery qq = qqs[0];
+    assertEquals("statement months  total 1987", qq.getValue("title"));
+    assertEquals("Topic 0 Description Line 1 Topic 0 Description Line 2", 
+        qq.getValue("description"));
+    assertEquals("Topic 0 Narrative Line 1 Topic 0 Narrative Line 2", 
+        qq.getValue("narrative"));
+    
+    qq = qqs[1];
+    assertEquals("agreed 15  against five", qq.getValue("title"));
+    assertEquals("Topic 1 Description Line 1 Topic 1 Description Line 2", 
+        qq.getValue("description"));
+    assertEquals("Topic 1 Narrative Line 1 Topic 1 Narrative Line 2", 
+        qq.getValue("narrative"));
+    
+    qq = qqs[19];
+    assertEquals("20 while  common week", qq.getValue("title"));
+    assertEquals("Topic 19 Description Line 1 Topic 19 Description Line 2", 
+        qq.getValue("description"));
+    assertEquals("Topic 19 Narrative Line 1 Topic 19 Narrative Line 2", 
+        qq.getValue("narrative"));
+  }
 
   // use benchmark logic to create the full Reuters index
   private void createReutersIndex() throws Exception {

Modified: lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/trecTopics.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/trecTopics.txt?rev=899369&r1=899368&r2=899369&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/trecTopics.txt (original)
+++ lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/trecTopics.txt Thu Jan 14 19:33:48 2010
@@ -26,10 +26,12 @@
 <title> statement months  total 1987
 
 <desc> Description:
-
+Topic 0 Description Line 1
+Topic 0 Description Line 2
 
 <narr> Narrative:
-
+Topic 0 Narrative Line 1
+Topic 0 Narrative Line 2
 
 </top>
 
@@ -39,10 +41,12 @@
 <title> agreed 15  against five
 
 <desc> Description:
-
+Topic 1 Description Line 1
+Topic 1 Description Line 2
 
 <narr> Narrative:
-
+Topic 1 Narrative Line 1
+Topic 1 Narrative Line 2
 
 </top>
 
@@ -273,9 +277,11 @@
 <title> 20 while  common week
 
 <desc> Description:
-
+Topic 19 Description Line 1
+Topic 19 Description Line 2
 
 <narr> Narrative:
-
+Topic 19 Narrative Line 1
+Topic 19 Narrative Line 2
 
 </top>