You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2008/02/06 13:06:45 UTC
svn commit: r618975 - in /lucene/nutch/trunk: ./ lib/
src/java/org/apache/nutch/indexer/ src/java/org/apache/nutch/searcher/
src/plugin/lib-lucene-analyzers/ src/plugin/lib-lucene-analyzers/lib/
src/plugin/summary-lucene/ src/plugin/summary-lucene/lib/
Author: ab
Date: Wed Feb 6 04:06:34 2008
New Revision: 618975
URL: http://svn.apache.org/viewvc?rev=618975&view=rev
Log:
NUTCH-604 Upgrade to Lucene 2.3.0.
Added:
lucene/nutch/trunk/lib/lucene-core-2.3.0.jar (with props)
lucene/nutch/trunk/lib/lucene-misc-2.3.0.jar (with props)
lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/lib/lucene-analyzers-2.3.0.jar (with props)
lucene/nutch/trunk/src/plugin/summary-lucene/lib/lucene-highlighter-2.3.0.jar (with props)
Removed:
lucene/nutch/trunk/lib/lucene-core-2.2.0.jar
lucene/nutch/trunk/lib/lucene-misc-2.2.0.jar
lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/lib/lucene-analyzers-2.2.0.jar
lucene/nutch/trunk/src/plugin/summary-lucene/lib/lucene-highlighter-2.2.0.jar
Modified:
lucene/nutch/trunk/CHANGES.txt
lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSorter.java
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/IndexSearcher.java
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LuceneQueryOptimizer.java
lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/plugin.xml
lucene/nutch/trunk/src/plugin/summary-lucene/plugin.xml
Modified: lucene/nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=618975&r1=618974&r2=618975&view=diff
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Wed Feb 6 04:06:34 2008
@@ -195,6 +195,8 @@
68. NUTCH-587 - Upgrade to Hadoop 0.15.3 (kubes)
+69. NUTCH-604 - Upgrade to Lucene 2.3.0 (ab)
+
Release 0.9 - 2007-04-02
1. Changed log4j confiquration to log to stdout on commandline
Added: lucene/nutch/trunk/lib/lucene-core-2.3.0.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/lucene-core-2.3.0.jar?rev=618975&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/nutch/trunk/lib/lucene-core-2.3.0.jar
------------------------------------------------------------------------------
svn:executable = *
Propchange: lucene/nutch/trunk/lib/lucene-core-2.3.0.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: lucene/nutch/trunk/lib/lucene-misc-2.3.0.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/lucene-misc-2.3.0.jar?rev=618975&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/nutch/trunk/lib/lucene-misc-2.3.0.jar
------------------------------------------------------------------------------
svn:executable = *
Propchange: lucene/nutch/trunk/lib/lucene-misc-2.3.0.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSorter.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSorter.java?rev=618975&r1=618974&r2=618975&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSorter.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSorter.java Wed Feb 6 04:06:34 2008
@@ -63,13 +63,17 @@
private static final String TEMP_FILE = "temp";
private final RAMDirectory tempDir = new RAMDirectory();
- private final RAMOutputStream out =
- (RAMOutputStream)tempDir.createOutput(TEMP_FILE);
+ private RAMOutputStream out;
private IndexInput in;
public SortedTermPositions(TermPositions original, int[] oldToNew) {
this.original = original;
this.oldToNew = oldToNew;
+ try {
+ out = (RAMOutputStream)tempDir.createOutput(TEMP_FILE);
+ } catch (IOException ioe) {
+ LOG.warn("Error creating temporary output: " + StringUtils.stringifyException(ioe));
+ }
}
public void seek(Term term) throws IOException {
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/IndexSearcher.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/IndexSearcher.java?rev=618975&r1=618974&r2=618975&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/IndexSearcher.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/IndexSearcher.java Wed Feb 6 04:06:34 2008
@@ -19,29 +19,27 @@
import java.io.File;
import java.io.IOException;
-import java.net.URI;
-
-import java.util.ArrayList;
-import java.util.Enumeration;
-
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
+import java.util.List;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiReader;
-
-import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.FieldCache;
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-
-import org.apache.hadoop.fs.*;
-import org.apache.hadoop.io.*;
-import org.apache.hadoop.conf.*;
-import org.apache.nutch.indexer.*;
+import org.apache.lucene.search.FieldDoc;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.nutch.indexer.FsDirectory;
+import org.apache.nutch.indexer.NutchSimilarity;
/** Implements {@link Searcher} and {@link HitDetailer} for either a single
* merged index, or a set of indexes. */
@@ -85,7 +83,7 @@
if ("file".equals(this.fs.getUri().getScheme())) {
Path qualified = file.makeQualified(FileSystem.getLocal(conf));
File fsLocal = new File(qualified.toUri());
- return FSDirectory.getDirectory(fsLocal.getAbsolutePath(), false);
+ return FSDirectory.getDirectory(fsLocal.getAbsolutePath());
} else {
return new FsDirectory(this.fs, file, false, this.conf);
}
@@ -109,20 +107,19 @@
}
public HitDetails getDetails(Hit hit) throws IOException {
- ArrayList fields = new ArrayList();
- ArrayList values = new ArrayList();
Document doc = luceneSearcher.doc(hit.getIndexDocNo());
- Enumeration e = doc.fields();
- while (e.hasMoreElements()) {
- Field field = (Field)e.nextElement();
- fields.add(field.name());
- values.add(field.stringValue());
+ List docFields = doc.getFields();
+ String[] fields = new String[docFields.size()];
+ String[] values = new String[docFields.size()];
+ for (int i = 0; i < docFields.size(); i++) {
+ Field field = (Field)docFields.get(i);
+ fields[i] = field.name();
+ values[i] = field.stringValue();
}
- return new HitDetails((String[])fields.toArray(new String[fields.size()]),
- (String[])values.toArray(new String[values.size()]));
+ return new HitDetails(fields, values);
}
public HitDetails[] getDetails(Hit[] hits) throws IOException {
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LuceneQueryOptimizer.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LuceneQueryOptimizer.java?rev=618975&r1=618974&r2=618975&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LuceneQueryOptimizer.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LuceneQueryOptimizer.java Wed Feb 6 04:06:34 2008
@@ -125,13 +125,15 @@
}
super.collect(doc, score);
}
- } private static class LimitExceeded extends RuntimeException {
+ }
+
+ private static class LimitExceeded extends RuntimeException {
private int maxDoc;
public LimitExceeded(int maxDoc) { this.maxDoc = maxDoc; }
}
- private LinkedHashMap cache; // an LRU cache of QueryFilter
-
+ private LinkedHashMap<BooleanQuery, Filter> cache; // an LRU cache of QueryFilter
+
private float threshold;
private int searcherMaxHits;
@@ -154,7 +156,7 @@
this.threshold = conf.getFloat("searcher.filter.cache.threshold",
0.05f);
this.searcherMaxHits = conf.getInt("searcher.max.hits", -1);
- this.cache = new LinkedHashMap(cacheSize, 0.75f, true) {
+ this.cache = new LinkedHashMap<BooleanQuery, Filter>(cacheSize, 0.75f, true) {
protected boolean removeEldestEntry(Map.Entry eldest) {
return size() > cacheSize; // limit size of cache
}
@@ -174,7 +176,7 @@
BooleanQuery query = new BooleanQuery();
BooleanQuery cacheQuery = new BooleanQuery();
BooleanQuery filterQuery = new BooleanQuery();
- ArrayList filters = new ArrayList();
+ ArrayList<Filter> filters = new ArrayList<Filter>();
BooleanClause[] clauses = original.getClauses();
for (int i = 0; i < clauses.length; i++) {
@@ -214,12 +216,12 @@
Filter filter = null;
if (cacheQuery.getClauses().length != 0) {
synchronized (cache) { // check cache
- filter = (Filter)cache.get(cacheQuery);
+ filter = cache.get(cacheQuery);
}
if (filter == null) { // miss
if (filterQuery.getClauses().length != 0) // add filterQuery to filters
- filters.add(new QueryFilter(filterQuery));
+ filters.add(new CachingWrapperFilter(new QueryWrapperFilter(filterQuery)));
if (filters.size() == 1) { // convert filters to filter
filter = (Filter)filters.get(0);
@@ -228,7 +230,7 @@
(new Filter[filters.size()]),
ChainedFilter.AND);
}
- if (!(filter instanceof QueryFilter)) // make sure bits are cached
+ if (!(filter instanceof CachingWrapperFilter)) // make sure bits are cached
filter = new CachingWrapperFilter(filter);
synchronized (cache) {
Added: lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/lib/lucene-analyzers-2.3.0.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/lib/lucene-analyzers-2.3.0.jar?rev=618975&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/lib/lucene-analyzers-2.3.0.jar
------------------------------------------------------------------------------
svn:executable = *
Propchange: lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/lib/lucene-analyzers-2.3.0.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Modified: lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/plugin.xml
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/plugin.xml?rev=618975&r1=618974&r2=618975&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/plugin.xml Wed Feb 6 04:06:34 2008
@@ -25,11 +25,11 @@
<plugin
id="lib-lucene-analyzers"
name="Lucene Analysers"
- version="2.2.0"
+ version="2.3.0"
provider-name="org.apache.lucene">
<runtime>
- <library name="lucene-analyzers-2.2.0.jar">
+ <library name="lucene-analyzers-2.3.0.jar">
<export name="*"/>
</library>
</runtime>
Added: lucene/nutch/trunk/src/plugin/summary-lucene/lib/lucene-highlighter-2.3.0.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/summary-lucene/lib/lucene-highlighter-2.3.0.jar?rev=618975&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/nutch/trunk/src/plugin/summary-lucene/lib/lucene-highlighter-2.3.0.jar
------------------------------------------------------------------------------
svn:executable = *
Propchange: lucene/nutch/trunk/src/plugin/summary-lucene/lib/lucene-highlighter-2.3.0.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Modified: lucene/nutch/trunk/src/plugin/summary-lucene/plugin.xml
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/summary-lucene/plugin.xml?rev=618975&r1=618974&r2=618975&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/summary-lucene/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/summary-lucene/plugin.xml Wed Feb 6 04:06:34 2008
@@ -25,7 +25,7 @@
<library name="summary-lucene.jar">
<export name="*"/>
</library>
- <library name="lucene-highlighter-2.2.0.jar"/>
+ <library name="lucene-highlighter-2.3.0.jar"/>
</runtime>
<requires>