You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ku...@apache.org on 2008/12/02 15:41:10 UTC
svn commit: r722475 - in /lucene/nutch/trunk: ./ lib/
src/java/org/apache/nutch/indexer/ src/plugin/lib-lucene-analyzers/
src/plugin/lib-lucene-analyzers/lib/
Author: kubes
Date: Tue Dec 2 06:41:09 2008
New Revision: 722475
URL: http://svn.apache.org/viewvc?rev=722475&view=rev
Log:
NUTCH-662: Upgrade Nutch to use Lucene 2.4
Added:
lucene/nutch/trunk/lib/lucene-core-2.4.0.jar (with props)
lucene/nutch/trunk/lib/lucene-misc-2.4.0.jar (with props)
lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/lib/lucene-analyzers-2.4.0.jar (with props)
Removed:
lucene/nutch/trunk/lib/lucene-core-2.3.0.jar
lucene/nutch/trunk/lib/lucene-misc-2.3.0.jar
lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/lib/lucene-analyzers-2.3.0.jar
Modified:
lucene/nutch/trunk/CHANGES.txt
lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java
lucene/nutch/trunk/src/java/org/apache/nutch/indexer/FsDirectory.java
lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/plugin.xml
Modified: lucene/nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=722475&r1=722474&r2=722475&view=diff
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Tue Dec 2 06:41:09 2008
@@ -283,7 +283,9 @@
104. NUTCH-640 - confusing description "set it to Integer.MAX_VALUE".
(dogacan)
-
+
+105. NUTCH-662 - Upgrade Nutch to use Lucene 2.4. (kubes)
+
Release 0.9 - 2007-04-02
1. Changed log4j confiquration to log to stdout on commandline
Added: lucene/nutch/trunk/lib/lucene-core-2.4.0.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/lucene-core-2.4.0.jar?rev=722475&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/nutch/trunk/lib/lucene-core-2.4.0.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: lucene/nutch/trunk/lib/lucene-misc-2.4.0.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/lucene-misc-2.4.0.jar?rev=722475&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/nutch/trunk/lib/lucene-misc-2.4.0.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java?rev=722475&r1=722474&r2=722475&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java Tue Dec 2 06:41:09 2008
@@ -145,10 +145,11 @@
/** Return each index as a split. */
public InputSplit[] getSplits(JobConf job, int numSplits)
throws IOException {
- Path[] files = listPaths(job);
+ FileStatus[] files = listStatus(job);
InputSplit[] splits = new InputSplit[files.length];
for (int i = 0; i < files.length; i++) {
- splits[i] = new FileSplit(files[i], 0, INDEX_LENGTH, (String[])null);
+ FileStatus cur = files[i];
+ splits[i] = new FileSplit(cur.getPath(), 0, INDEX_LENGTH, (String[])null);
}
return splits;
}
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/FsDirectory.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/FsDirectory.java?rev=722475&r1=722474&r2=722475&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/FsDirectory.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/FsDirectory.java Tue Dec 2 06:41:09 2008
@@ -18,6 +18,8 @@
package org.apache.nutch.indexer;
import java.io.*;
+import java.util.Random;
+
import org.apache.lucene.store.*;
import org.apache.nutch.util.HadoopFSUtil;
import org.apache.hadoop.fs.*;
@@ -207,30 +209,45 @@
private class DfsIndexOutput extends BufferedIndexOutput {
private FSDataOutputStream out;
+ private RandomAccessFile local;
+ private File localFile;
public DfsIndexOutput(Path path, int ioFileBufferSize) throws IOException {
+
+ // create a temporary local file and set it to delete on exit
+ String randStr = Integer.toString(new Random().nextInt(Integer.MAX_VALUE));
+ localFile = File.createTempFile("index_" + randStr, ".tmp");
+ localFile.deleteOnExit();
+ local = new RandomAccessFile(localFile, "rw");
+
out = fs.create(path);
}
public void flushBuffer(byte[] b, int offset, int size) throws IOException {
- out.write(b, offset, size);
+ local.write(b, offset, size);
}
public void close() throws IOException {
super.close();
+
+ // transfer to dfs from local
+ byte[] buffer = new byte[4096];
+ local.seek(0);
+ int read = -1;
+ while ((read = local.read(buffer)) != -1) {
+ out.write(buffer, 0, read);
+ }
out.close();
+ local.close();
}
public void seek(long pos) throws IOException {
- throw new UnsupportedOperationException();
+ super.seek(pos);
+ local.seek(pos);
}
public long length() throws IOException {
- return out.getPos();
- }
-
- protected void finalize() throws IOException {
- out.close(); // close the file
+ return local.length();
}
}
Added: lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/lib/lucene-analyzers-2.4.0.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/lib/lucene-analyzers-2.4.0.jar?rev=722475&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/lib/lucene-analyzers-2.4.0.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Modified: lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/plugin.xml
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/plugin.xml?rev=722475&r1=722474&r2=722475&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/plugin.xml Tue Dec 2 06:41:09 2008
@@ -25,11 +25,11 @@
<plugin
id="lib-lucene-analyzers"
name="Lucene Analysers"
- version="2.3.0"
+ version="2.4.0"
provider-name="org.apache.lucene">
<runtime>
- <library name="lucene-analyzers-2.3.0.jar">
+ <library name="lucene-analyzers-2.4.0.jar">
<export name="*"/>
</library>
</runtime>