You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by ma...@apache.org on 2009/07/20 14:19:06 UTC

svn commit: r795792 - in /lucene/java/trunk/contrib/benchmark: CHANGES.txt src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java

Author: markrmiller
Date: Mon Jul 20 12:19:06 2009
New Revision: 795792

URL: http://svn.apache.org/viewvc?rev=795792&view=rev
Log:
LUCENE-1755: Fix WriteLineDocTask to output a document if it contains either a title or body (or both). 

Modified:
    lucene/java/trunk/contrib/benchmark/CHANGES.txt
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java
    lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java

Modified: lucene/java/trunk/contrib/benchmark/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/CHANGES.txt?rev=795792&r1=795791&r2=795792&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/CHANGES.txt (original)
+++ lucene/java/trunk/contrib/benchmark/CHANGES.txt Mon Jul 20 12:19:06 2009
@@ -4,6 +4,10 @@
 
 $Id:$
 
+7/20/2009
+  LUCENE-1755: Fix WriteLineDocTask to output a document if it contains either 
+  a title or body (or both).  (Shai Erera via Mark Miller)
+
 7/14/2009
   LUCENE-1725: Fix the example Sort algorithm - auto is now deprecated and no longer works
   with Benchmark. Benchmark will now throw an exception if you specify sort fields without

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java?rev=795792&r1=795791&r2=795792&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java Mon Jul 20 12:19:06 2009
@@ -97,11 +97,12 @@
     Document doc = docSize > 0 ? docMaker.makeDocument(docSize) : docMaker.makeDocument();
 
     Field f = doc.getField(DocMaker.BODY_FIELD);
-    String body = f != null ? NORMALIZER.reset(f.stringValue()).replaceAll(" ") : null;
+    String body = f != null ? NORMALIZER.reset(f.stringValue()).replaceAll(" ") : "";
     
-    if (body != null) {
-      f = doc.getField(DocMaker.TITLE_FIELD);
-      String title = f != null ? NORMALIZER.reset(f.stringValue()).replaceAll(" ") : "";
+    f = doc.getField(DocMaker.TITLE_FIELD);
+    String title = f != null ? NORMALIZER.reset(f.stringValue()).replaceAll(" ") : "";
+    
+    if (body.length() > 0 || title.length() > 0) {
       
       f = doc.getField(DocMaker.DATE_FIELD);
       String date = f != null ? NORMALIZER.reset(f.stringValue()).replaceAll(" ") : "";

Modified: lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java?rev=795792&r1=795791&r2=795792&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java Mon Jul 20 12:19:06 2009
@@ -39,7 +39,7 @@
 
   // class has to be public so that Class.forName.newInstance() will work
   public static final class WriteLineDocMaker extends DocMaker {
-
+  
     public Document makeDocument() throws Exception {
       Document doc = new Document();
       doc.add(new Field(BODY_FIELD, "body", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
@@ -52,7 +52,7 @@
   
   // class has to be public so that Class.forName.newInstance() will work
   public static final class NewLinesDocMaker extends DocMaker {
-    
+  
     public Document makeDocument() throws Exception {
       Document doc = new Document();
       doc.add(new Field(BODY_FIELD, "body\r\ntext\ttwo", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
@@ -63,6 +63,35 @@
     
   }
   
+  // class has to be public so that Class.forName.newInstance() will work
+  public static final class NoBodyDocMaker extends DocMaker {
+    public Document makeDocument() throws Exception {
+      Document doc = new Document();
+      doc.add(new Field(TITLE_FIELD, "title", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
+      doc.add(new Field(DATE_FIELD, "date", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
+      return doc;
+    }
+  }
+  
+  // class has to be public so that Class.forName.newInstance() will work
+  public static final class NoTitleDocMaker extends DocMaker {
+    public Document makeDocument() throws Exception {
+      Document doc = new Document();
+      doc.add(new Field(BODY_FIELD, "body", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
+      doc.add(new Field(DATE_FIELD, "date", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
+      return doc;
+    }
+  }
+  
+  // class has to be public so that Class.forName.newInstance() will work
+  public static final class JustDateDocMaker extends DocMaker {
+    public Document makeDocument() throws Exception {
+      Document doc = new Document();
+      doc.add(new Field(DATE_FIELD, "date", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
+      return doc;
+    }
+  }
+  
   private static final CompressorStreamFactory csFactory = new CompressorStreamFactory();
 
   private PerfRunData createPerfRunData(File file, boolean setBZCompress,
@@ -90,10 +119,13 @@
       String line = br.readLine();
       assertNotNull(line);
       String[] parts = line.split(Character.toString(WriteLineDocTask.SEP));
-      assertEquals(3, parts.length);
+      int numExpParts = expBody == null ? 2 : 3;
+      assertEquals(numExpParts, parts.length);
       assertEquals(expTitle, parts[0]);
       assertEquals(expDate, parts[1]);
-      assertEquals(expBody, parts[2]);
+      if (expBody != null) {
+        assertEquals(expBody, parts[2]);
+      }
       assertNull(br.readLine());
     } finally {
       br.close();
@@ -149,4 +181,44 @@
     
     doReadTest(file, false, "title text", "date text", "body text two");
   }
+  
+  public void testEmptyBody() throws Exception {
+    // WriteLineDocTask threw away documents w/ no BODY element, even if they
+    // had a TITLE element (LUCENE-1755). It should throw away documents if they
+    // don't have BODY nor TITLE
+    File file = new File(getWorkDir(), "one-line");
+    PerfRunData runData = createPerfRunData(file, false, null, NoBodyDocMaker.class.getName());
+    WriteLineDocTask wldt = new WriteLineDocTask(runData);
+    wldt.doLogic();
+    wldt.close();
+    
+    doReadTest(file, false, "title", "date", null);
+  }
+  
+  public void testEmptyTitle() throws Exception {
+    File file = new File(getWorkDir(), "one-line");
+    PerfRunData runData = createPerfRunData(file, false, null, NoTitleDocMaker.class.getName());
+    WriteLineDocTask wldt = new WriteLineDocTask(runData);
+    wldt.doLogic();
+    wldt.close();
+    
+    doReadTest(file, false, "", "date", "body");
+  }
+  
+  public void testJustDate() throws Exception {
+    File file = new File(getWorkDir(), "one-line");
+    PerfRunData runData = createPerfRunData(file, false, null, JustDateDocMaker.class.getName());
+    WriteLineDocTask wldt = new WriteLineDocTask(runData);
+    wldt.doLogic();
+    wldt.close();
+    
+    BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "utf-8"));
+    try {
+      String line = br.readLine();
+      assertNull(line);
+    } finally {
+      br.close();
+    }
+  }
+  
 }