You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by rm...@apache.org on 2010/01/14 20:33:51 UTC
svn commit: r899369 - in /lucene/java/trunk/contrib/benchmark: ./
src/java/org/apache/lucene/benchmark/quality/trec/
src/test/org/apache/lucene/benchmark/quality/
Author: rmuir
Date: Thu Jan 14 19:33:48 2010
New Revision: 899369
URL: http://svn.apache.org/viewvc?rev=899369&view=rev
Log:
LUCENE-2210: fix TrecTopicsReader for descriptions and narratives
Modified:
lucene/java/trunk/contrib/benchmark/CHANGES.txt
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java
lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java
lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/trecTopics.txt
Modified: lucene/java/trunk/contrib/benchmark/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/CHANGES.txt?rev=899369&r1=899368&r2=899369&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/CHANGES.txt (original)
+++ lucene/java/trunk/contrib/benchmark/CHANGES.txt Thu Jan 14 19:33:48 2010
@@ -4,6 +4,10 @@
$Id:$
+1/14/2010
+ LUCENE-2210: TrecTopicsReader now properly reads descriptions and
+ narratives from trec topics files. (Robert Muir)
+
1/11/2010
LUCENE-2181: Add a benchmark for collation. This adds NewLocaleTask,
which sets a Locale in the run data for collation to use, and can be
Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java?rev=899369&r1=899368&r2=899369&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java Thu Jan 14 19:33:48 2010
@@ -76,16 +76,31 @@
k = sb.indexOf(">");
String title = sb.substring(k+1).trim();
// description
- sb = read(reader,"<desc>",null,false,false);
- sb = read(reader,"<narr>",null,false,true);
- String descripion = sb.toString().trim();
+ read(reader,"<desc>",null,false,false);
+ sb.setLength(0);
+ String line = null;
+ while ((line = reader.readLine()) != null) {
+ if (line.startsWith("<narr>"))
+ break;
+ if (sb.length() > 0) sb.append(' ');
+ sb.append(line);
+ }
+ String description = sb.toString().trim();
+ // narrative
+ sb.setLength(0);
+ while ((line = reader.readLine()) != null) {
+ if (line.startsWith("</top>"))
+ break;
+ if (sb.length() > 0) sb.append(' ');
+ sb.append(line);
+ }
+ String narrative = sb.toString().trim();
// we got a topic!
fields.put("title",title);
- fields.put("description",descripion);
+ fields.put("description",description);
+ fields.put("narrative", narrative);
QualityQuery topic = new QualityQuery(id,fields);
res.add(topic);
- // skip narrative, get to end of doc
- read(reader,"</top>",null,false,false);
}
} finally {
reader.close();
Modified: lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java?rev=899369&r1=899368&r2=899369&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java Thu Jan 14 19:33:48 2010
@@ -149,6 +149,49 @@
}
+
+ public void testTrecTopicsReader() throws Exception {
+ File workDir = new File(System.getProperty("benchmark.work.dir","work"));
+ assertTrue("Bad workDir: " + workDir,
+ workDir.exists() && workDir.isDirectory());
+
+ // <tests src dir> for topics/qrels files:
+ // src/test/org/apache/lucene/benchmark/quality
+ File srcTestDir = new File(new File(new File(new File(new File(
+ new File(new File(workDir.getAbsoluteFile().getParentFile(),
+ "src"),"test"),"org"),"apache"),"lucene"),"benchmark"),"quality");
+
+ // prepare topics
+ File topicsFile = new File(srcTestDir, "trecTopics.txt");
+ assertTrue("Bad topicsFile: " + topicsFile,
+ topicsFile.exists() && topicsFile.isFile());
+ TrecTopicsReader qReader = new TrecTopicsReader();
+ QualityQuery qqs[] = qReader.readQueries(
+ new BufferedReader(new FileReader(topicsFile)));
+
+ assertEquals(20, qqs.length);
+
+ QualityQuery qq = qqs[0];
+ assertEquals("statement months total 1987", qq.getValue("title"));
+ assertEquals("Topic 0 Description Line 1 Topic 0 Description Line 2",
+ qq.getValue("description"));
+ assertEquals("Topic 0 Narrative Line 1 Topic 0 Narrative Line 2",
+ qq.getValue("narrative"));
+
+ qq = qqs[1];
+ assertEquals("agreed 15 against five", qq.getValue("title"));
+ assertEquals("Topic 1 Description Line 1 Topic 1 Description Line 2",
+ qq.getValue("description"));
+ assertEquals("Topic 1 Narrative Line 1 Topic 1 Narrative Line 2",
+ qq.getValue("narrative"));
+
+ qq = qqs[19];
+ assertEquals("20 while common week", qq.getValue("title"));
+ assertEquals("Topic 19 Description Line 1 Topic 19 Description Line 2",
+ qq.getValue("description"));
+ assertEquals("Topic 19 Narrative Line 1 Topic 19 Narrative Line 2",
+ qq.getValue("narrative"));
+ }
// use benchmark logic to create the full Reuters index
private void createReutersIndex() throws Exception {
Modified: lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/trecTopics.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/trecTopics.txt?rev=899369&r1=899368&r2=899369&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/trecTopics.txt (original)
+++ lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/trecTopics.txt Thu Jan 14 19:33:48 2010
@@ -26,10 +26,12 @@
<title> statement months total 1987
<desc> Description:
-
+Topic 0 Description Line 1
+Topic 0 Description Line 2
<narr> Narrative:
-
+Topic 0 Narrative Line 1
+Topic 0 Narrative Line 2
</top>
@@ -39,10 +41,12 @@
<title> agreed 15 against five
<desc> Description:
-
+Topic 1 Description Line 1
+Topic 1 Description Line 2
<narr> Narrative:
-
+Topic 1 Narrative Line 1
+Topic 1 Narrative Line 2
</top>
@@ -273,9 +277,11 @@
<title> 20 while common week
<desc> Description:
-
+Topic 19 Description Line 1
+Topic 19 Description Line 2
<narr> Narrative:
-
+Topic 19 Narrative Line 1
+Topic 19 Narrative Line 2
</top>