You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2008/02/06 13:06:45 UTC

svn commit: r618975 - in /lucene/nutch/trunk: ./ lib/ src/java/org/apache/nutch/indexer/ src/java/org/apache/nutch/searcher/ src/plugin/lib-lucene-analyzers/ src/plugin/lib-lucene-analyzers/lib/ src/plugin/summary-lucene/ src/plugin/summary-lucene/lib/

Author: ab
Date: Wed Feb  6 04:06:34 2008
New Revision: 618975

URL: http://svn.apache.org/viewvc?rev=618975&view=rev
Log:
NUTCH-604 Upgrade to Lucene 2.3.0.

Added:
    lucene/nutch/trunk/lib/lucene-core-2.3.0.jar   (with props)
    lucene/nutch/trunk/lib/lucene-misc-2.3.0.jar   (with props)
    lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/lib/lucene-analyzers-2.3.0.jar   (with props)
    lucene/nutch/trunk/src/plugin/summary-lucene/lib/lucene-highlighter-2.3.0.jar   (with props)
Removed:
    lucene/nutch/trunk/lib/lucene-core-2.2.0.jar
    lucene/nutch/trunk/lib/lucene-misc-2.2.0.jar
    lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/lib/lucene-analyzers-2.2.0.jar
    lucene/nutch/trunk/src/plugin/summary-lucene/lib/lucene-highlighter-2.2.0.jar
Modified:
    lucene/nutch/trunk/CHANGES.txt
    lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSorter.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/IndexSearcher.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LuceneQueryOptimizer.java
    lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/plugin.xml
    lucene/nutch/trunk/src/plugin/summary-lucene/plugin.xml

Modified: lucene/nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=618975&r1=618974&r2=618975&view=diff
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Wed Feb  6 04:06:34 2008
@@ -195,6 +195,8 @@
 
 68. NUTCH-587 - Upgrade to Hadoop 0.15.3 (kubes)
 
+69. NUTCH-604 - Upgrade to Lucene 2.3.0 (ab)
+
 Release 0.9 - 2007-04-02
 
  1. Changed log4j confiquration to log to stdout on commandline

Added: lucene/nutch/trunk/lib/lucene-core-2.3.0.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/lucene-core-2.3.0.jar?rev=618975&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/lib/lucene-core-2.3.0.jar
------------------------------------------------------------------------------
    svn:executable = *

Propchange: lucene/nutch/trunk/lib/lucene-core-2.3.0.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/nutch/trunk/lib/lucene-misc-2.3.0.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/lucene-misc-2.3.0.jar?rev=618975&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/lib/lucene-misc-2.3.0.jar
------------------------------------------------------------------------------
    svn:executable = *

Propchange: lucene/nutch/trunk/lib/lucene-misc-2.3.0.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSorter.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSorter.java?rev=618975&r1=618974&r2=618975&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSorter.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSorter.java Wed Feb  6 04:06:34 2008
@@ -63,13 +63,17 @@
 
     private static final String TEMP_FILE = "temp";
     private final RAMDirectory tempDir = new RAMDirectory();
-    private final RAMOutputStream out =
-      (RAMOutputStream)tempDir.createOutput(TEMP_FILE);
+    private RAMOutputStream out;
     private IndexInput in;
 
     public SortedTermPositions(TermPositions original, int[] oldToNew) {
       this.original = original;
       this.oldToNew = oldToNew;
+      try {
+        out = (RAMOutputStream)tempDir.createOutput(TEMP_FILE);
+      } catch (IOException ioe) {
+        LOG.warn("Error creating temporary output: " + StringUtils.stringifyException(ioe));
+      }
     }
 
     public void seek(Term term) throws IOException {

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/IndexSearcher.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/IndexSearcher.java?rev=618975&r1=618974&r2=618975&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/IndexSearcher.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/IndexSearcher.java Wed Feb  6 04:06:34 2008
@@ -19,29 +19,27 @@
 
 import java.io.File;
 import java.io.IOException;
-import java.net.URI;
-
-import java.util.ArrayList;
-import java.util.Enumeration;
-
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
+import java.util.List;
 
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.MultiReader;
-
-import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.FieldDoc;
 import org.apache.lucene.search.FieldCache;
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-
-import org.apache.hadoop.fs.*;
-import org.apache.hadoop.io.*;
-import org.apache.hadoop.conf.*;
-import org.apache.nutch.indexer.*;
+import org.apache.lucene.search.FieldDoc;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.nutch.indexer.FsDirectory;
+import org.apache.nutch.indexer.NutchSimilarity;
 
 /** Implements {@link Searcher} and {@link HitDetailer} for either a single
  * merged index, or a set of indexes. */
@@ -85,7 +83,7 @@
     if ("file".equals(this.fs.getUri().getScheme())) {
       Path qualified = file.makeQualified(FileSystem.getLocal(conf));
       File fsLocal = new File(qualified.toUri());
-      return FSDirectory.getDirectory(fsLocal.getAbsolutePath(), false);
+      return FSDirectory.getDirectory(fsLocal.getAbsolutePath());
     } else {
       return new FsDirectory(this.fs, file, false, this.conf);
     }
@@ -109,20 +107,19 @@
   }
 
   public HitDetails getDetails(Hit hit) throws IOException {
-    ArrayList fields = new ArrayList();
-    ArrayList values = new ArrayList();
 
     Document doc = luceneSearcher.doc(hit.getIndexDocNo());
 
-    Enumeration e = doc.fields();
-    while (e.hasMoreElements()) {
-      Field field = (Field)e.nextElement();
-      fields.add(field.name());
-      values.add(field.stringValue());
+    List docFields = doc.getFields();
+    String[] fields = new String[docFields.size()];
+    String[] values = new String[docFields.size()];
+    for (int i = 0; i < docFields.size(); i++) {
+      Field field = (Field)docFields.get(i);
+      fields[i] = field.name();
+      values[i] = field.stringValue();
     }
 
-    return new HitDetails((String[])fields.toArray(new String[fields.size()]),
-                          (String[])values.toArray(new String[values.size()]));
+    return new HitDetails(fields, values);
   }
 
   public HitDetails[] getDetails(Hit[] hits) throws IOException {

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LuceneQueryOptimizer.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LuceneQueryOptimizer.java?rev=618975&r1=618974&r2=618975&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LuceneQueryOptimizer.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LuceneQueryOptimizer.java Wed Feb  6 04:06:34 2008
@@ -125,13 +125,15 @@
       }
       super.collect(doc, score);
     }
-  }  private static class LimitExceeded extends RuntimeException {
+  }
+  
+  private static class LimitExceeded extends RuntimeException {
     private int maxDoc;
     public LimitExceeded(int maxDoc) { this.maxDoc = maxDoc; }    
   }
   
-  private LinkedHashMap cache;                   // an LRU cache of QueryFilter
-
+  private LinkedHashMap<BooleanQuery, Filter> cache;                   // an LRU cache of QueryFilter
+  
   private float threshold;
 
   private int searcherMaxHits;
@@ -154,7 +156,7 @@
     this.threshold = conf.getFloat("searcher.filter.cache.threshold",
         0.05f);
     this.searcherMaxHits = conf.getInt("searcher.max.hits", -1);
-    this.cache = new LinkedHashMap(cacheSize, 0.75f, true) {
+    this.cache = new LinkedHashMap<BooleanQuery, Filter>(cacheSize, 0.75f, true) {
       protected boolean removeEldestEntry(Map.Entry eldest) {
         return size() > cacheSize; // limit size of cache
       }
@@ -174,7 +176,7 @@
     BooleanQuery query = new BooleanQuery();
     BooleanQuery cacheQuery = new BooleanQuery();
     BooleanQuery filterQuery = new BooleanQuery();
-    ArrayList filters = new ArrayList();
+    ArrayList<Filter> filters = new ArrayList<Filter>();
 
     BooleanClause[] clauses = original.getClauses();
     for (int i = 0; i < clauses.length; i++) {
@@ -214,12 +216,12 @@
     Filter filter = null;
     if (cacheQuery.getClauses().length != 0) {
       synchronized (cache) {                      // check cache
-        filter = (Filter)cache.get(cacheQuery);
+        filter = cache.get(cacheQuery);
       }
       if (filter == null) {                       // miss
 
         if (filterQuery.getClauses().length != 0) // add filterQuery to filters
-          filters.add(new QueryFilter(filterQuery));
+          filters.add(new CachingWrapperFilter(new QueryWrapperFilter(filterQuery)));
 
         if (filters.size() == 1) {                // convert filters to filter
           filter = (Filter)filters.get(0);
@@ -228,7 +230,7 @@
                                      (new Filter[filters.size()]),
                                      ChainedFilter.AND);
         }
-        if (!(filter instanceof QueryFilter))     // make sure bits are cached
+        if (!(filter instanceof CachingWrapperFilter))     // make sure bits are cached
           filter = new CachingWrapperFilter(filter);
         
         synchronized (cache) {

Added: lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/lib/lucene-analyzers-2.3.0.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/lib/lucene-analyzers-2.3.0.jar?rev=618975&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/lib/lucene-analyzers-2.3.0.jar
------------------------------------------------------------------------------
    svn:executable = *

Propchange: lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/lib/lucene-analyzers-2.3.0.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Modified: lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/plugin.xml
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/plugin.xml?rev=618975&r1=618974&r2=618975&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/plugin.xml Wed Feb  6 04:06:34 2008
@@ -25,11 +25,11 @@
 <plugin
    id="lib-lucene-analyzers"
    name="Lucene Analysers"
-   version="2.2.0"
+   version="2.3.0"
    provider-name="org.apache.lucene">
 
    <runtime>
-     <library name="lucene-analyzers-2.2.0.jar">
+     <library name="lucene-analyzers-2.3.0.jar">
         <export name="*"/>
      </library>
    </runtime>

Added: lucene/nutch/trunk/src/plugin/summary-lucene/lib/lucene-highlighter-2.3.0.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/summary-lucene/lib/lucene-highlighter-2.3.0.jar?rev=618975&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/src/plugin/summary-lucene/lib/lucene-highlighter-2.3.0.jar
------------------------------------------------------------------------------
    svn:executable = *

Propchange: lucene/nutch/trunk/src/plugin/summary-lucene/lib/lucene-highlighter-2.3.0.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Modified: lucene/nutch/trunk/src/plugin/summary-lucene/plugin.xml
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/summary-lucene/plugin.xml?rev=618975&r1=618974&r2=618975&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/summary-lucene/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/summary-lucene/plugin.xml Wed Feb  6 04:06:34 2008
@@ -25,7 +25,7 @@
       <library name="summary-lucene.jar">
          <export name="*"/>
       </library>
-      <library name="lucene-highlighter-2.2.0.jar"/>
+      <library name="lucene-highlighter-2.3.0.jar"/>
    </runtime>
 
    <requires>