You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by do...@apache.org on 2007/03/16 02:16:56 UTC

svn commit: r518822 - in /lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark: byTask/feeds/ReutersDocMaker.java byTask/feeds/TrecDocMaker.java standard/StandardBenchmarker.java utils/ExtractReuters.java

Author: doronc
Date: Thu Mar 15 18:16:56 2007
New Revision: 518822

URL: http://svn.apache.org/viewvc?view=rev&rev=518822
Log:
LUCENE-829: close done readers in contrib/benchmark.

Modified:
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocMaker.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/standard/StandardBenchmarker.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java?view=diff&rev=518822&r1=518821&r2=518822
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java Thu Mar 15 18:16:56 2007
@@ -79,6 +79,7 @@
     while ((line = reader.readLine()) != null) {
       bodyBuf.append(line).append(' ');
     }
+    reader.close();
     
     addBytes(f.length());
 

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocMaker.java?view=diff&rev=518822&r1=518821&r2=518822
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocMaker.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocMaker.java Thu Mar 15 18:16:56 2007
@@ -26,8 +26,10 @@
 import java.io.Reader;
 import java.io.StringReader;
 import java.text.DateFormat;
+import java.text.ParseException;
 import java.text.SimpleDateFormat;
 import java.util.ArrayList;
+import java.util.Date;
 import java.util.Locale;
 import java.util.Properties;
 import java.util.zip.GZIPInputStream;
@@ -159,7 +161,8 @@
     read("</DOCHDR>",null,false,false); 
     // 6. collect until end of doc
     sb = read("</DOC>",null,false,true);
-    // this is the next document, so parse it  
+    // this is the next document, so parse it 
+    // TODO use a more robust html parser (current one aborts parsing quite easily). 
     HTMLParser p = new HTMLParser(new StringReader(sb.toString()));
     // title
     String title = p.getTitle();
@@ -175,11 +178,18 @@
         bodyBuf.append(c,0,n);
       }
     }
+    r.close();
     addBytes(bodyBuf.length());
     
     DocData dd = new DocData();
-    
-    dd.date = dateFormat.parse(dateStr.trim());
+
+    try {
+      dd.date = dateFormat.parse(dateStr.trim());
+    } catch (ParseException e) {
+      // do not fail test just because a date could not be parsed
+      System.out.println("ignoring date parse exception (assigning 'now') for: "+dateStr);
+      dd.date = new Date(); // now 
+    }
     dd.name = name;
     dd.title = title;
     dd.body = bodyBuf.toString();

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/standard/StandardBenchmarker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/standard/StandardBenchmarker.java?view=diff&rev=518822&r1=518821&r2=518822
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/standard/StandardBenchmarker.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/standard/StandardBenchmarker.java Thu Mar 15 18:16:56 2007
@@ -275,6 +275,8 @@
         {
             body.append(line).append(' ');
         }
+        reader.close();
+        
         Date date = format.parse(dateStr.trim());
 
         doc.add(new Field("date", DateTools.dateToString(date, DateTools.Resolution.SECOND), Field.Store.YES, Field.Index.UN_TOKENIZED));

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java?view=diff&rev=518822&r1=518821&r2=518822
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java Thu Mar 15 18:16:56 2007
@@ -135,6 +135,7 @@
                     buffer.setLength(0);
                 }
             }
+            reader.close();
         }
 
         catch (