You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ku...@apache.org on 2008/12/02 15:41:10 UTC

svn commit: r722475 - in /lucene/nutch/trunk: ./ lib/ src/java/org/apache/nutch/indexer/ src/plugin/lib-lucene-analyzers/ src/plugin/lib-lucene-analyzers/lib/

Author: kubes
Date: Tue Dec  2 06:41:09 2008
New Revision: 722475

URL: http://svn.apache.org/viewvc?rev=722475&view=rev
Log:
NUTCH-662: Upgrade Nutch to use Lucene 2.4

Added:
    lucene/nutch/trunk/lib/lucene-core-2.4.0.jar   (with props)
    lucene/nutch/trunk/lib/lucene-misc-2.4.0.jar   (with props)
    lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/lib/lucene-analyzers-2.4.0.jar   (with props)
Removed:
    lucene/nutch/trunk/lib/lucene-core-2.3.0.jar
    lucene/nutch/trunk/lib/lucene-misc-2.3.0.jar
    lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/lib/lucene-analyzers-2.3.0.jar
Modified:
    lucene/nutch/trunk/CHANGES.txt
    lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java
    lucene/nutch/trunk/src/java/org/apache/nutch/indexer/FsDirectory.java
    lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/plugin.xml

Modified: lucene/nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=722475&r1=722474&r2=722475&view=diff
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Tue Dec  2 06:41:09 2008
@@ -283,7 +283,9 @@
 
 104. NUTCH-640 - confusing description "set it to Integer.MAX_VALUE".
      (dogacan)
-
+     
+105. NUTCH-662 - Upgrade Nutch to use Lucene 2.4. (kubes)
+     
 Release 0.9 - 2007-04-02
 
  1. Changed log4j confiquration to log to stdout on commandline

Added: lucene/nutch/trunk/lib/lucene-core-2.4.0.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/lucene-core-2.4.0.jar?rev=722475&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/lib/lucene-core-2.4.0.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/nutch/trunk/lib/lucene-misc-2.4.0.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/lucene-misc-2.4.0.jar?rev=722475&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/lib/lucene-misc-2.4.0.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java?rev=722475&r1=722474&r2=722475&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java Tue Dec  2 06:41:09 2008
@@ -145,10 +145,11 @@
     /** Return each index as a split. */
     public InputSplit[] getSplits(JobConf job, int numSplits)
       throws IOException {
-      Path[] files = listPaths(job);
+      FileStatus[] files = listStatus(job);
       InputSplit[] splits = new InputSplit[files.length];
       for (int i = 0; i < files.length; i++) {
-        splits[i] = new FileSplit(files[i], 0, INDEX_LENGTH, (String[])null);
+        FileStatus cur = files[i];
+        splits[i] = new FileSplit(cur.getPath(), 0, INDEX_LENGTH, (String[])null);
       }
       return splits;
     }

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/FsDirectory.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/FsDirectory.java?rev=722475&r1=722474&r2=722475&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/FsDirectory.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/FsDirectory.java Tue Dec  2 06:41:09 2008
@@ -18,6 +18,8 @@
 package org.apache.nutch.indexer;
 
 import java.io.*;
+import java.util.Random;
+
 import org.apache.lucene.store.*;
 import org.apache.nutch.util.HadoopFSUtil;
 import org.apache.hadoop.fs.*;
@@ -207,30 +209,45 @@
 
   private class DfsIndexOutput extends BufferedIndexOutput {
     private FSDataOutputStream out;
+    private RandomAccessFile local;
+    private File localFile;
 
     public DfsIndexOutput(Path path, int ioFileBufferSize) throws IOException {
+      
+      // create a temporary local file and set it to delete on exit
+      String randStr = Integer.toString(new Random().nextInt(Integer.MAX_VALUE));
+      localFile = File.createTempFile("index_" + randStr, ".tmp");
+      localFile.deleteOnExit();
+      local = new RandomAccessFile(localFile, "rw");
+
       out = fs.create(path);
     }
 
     public void flushBuffer(byte[] b, int offset, int size) throws IOException {
-      out.write(b, offset, size);
+      local.write(b, offset, size);
     }
 
     public void close() throws IOException {
       super.close();
+      
+      // transfer to dfs from local
+      byte[] buffer = new byte[4096];
+      local.seek(0);
+      int read = -1;
+      while ((read = local.read(buffer)) != -1) {
+        out.write(buffer, 0, read);
+      }
       out.close();
+      local.close();
     }
 
     public void seek(long pos) throws IOException {
-      throw new UnsupportedOperationException();
+      super.seek(pos);
+      local.seek(pos);
     }
 
     public long length() throws IOException {
-      return out.getPos();
-    }
-
-    protected void finalize() throws IOException {
-      out.close();                                // close the file
+      return local.length();
     }
 
   }

Added: lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/lib/lucene-analyzers-2.4.0.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/lib/lucene-analyzers-2.4.0.jar?rev=722475&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/lib/lucene-analyzers-2.4.0.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Modified: lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/plugin.xml
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/plugin.xml?rev=722475&r1=722474&r2=722475&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/plugin.xml Tue Dec  2 06:41:09 2008
@@ -25,11 +25,11 @@
 <plugin
    id="lib-lucene-analyzers"
    name="Lucene Analysers"
-   version="2.3.0"
+   version="2.4.0"
    provider-name="org.apache.lucene">
 
    <runtime>
-     <library name="lucene-analyzers-2.3.0.jar">
+     <library name="lucene-analyzers-2.4.0.jar">
         <export name="*"/>
      </library>
    </runtime>