You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by do...@apache.org on 2007/07/11 17:30:33 UTC
svn commit: r555307 - in /lucene/nutch/trunk: CHANGES.txt
src/java/org/apache/nutch/indexer/IndexMerger.java
Author: dogacan
Date: Wed Jul 11 08:30:29 2007
New Revision: 555307
URL: http://svn.apache.org/viewvc?view=rev&rev=555307
Log:
NUTCH-510 - IndexMerger delete working dir. Contributed by Enis.
Modified:
lucene/nutch/trunk/CHANGES.txt
lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java
Modified: lucene/nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?view=diff&rev=555307&r1=555306&r2=555307
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Wed Jul 11 08:30:29 2007
@@ -83,6 +83,8 @@
27. NUTCH-505 - Outlink urls should be validated. (dogacan)
+28. NUTCH-510 - IndexMerger delete working dir. (Enis Soztutar via dogacan)
+
Release 0.9 - 2007-04-02
1. Changed log4j confiquration to log to stdout on commandline
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java?view=diff&rev=555307&r1=555306&r2=555307
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java Wed Jul 11 08:30:29 2007
@@ -24,6 +24,7 @@
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.*;
+import org.apache.hadoop.mapred.FileAlreadyExistsException;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.ToolBase;
import org.apache.hadoop.conf.*;
@@ -58,13 +59,9 @@
* Merge all input indexes to the single output index
*/
public void merge(Path[] indexes, Path outputIndex, Path localWorkingDir) throws IOException {
- if (LOG.isInfoEnabled()) {
- LOG.info("merging indexes to: " + outputIndex);
- }
- FileSystem localFs = FileSystem.getLocal(getConf());
- if (localWorkingDir == null) {
- localWorkingDir = new Path("indexmerger-" + System.currentTimeMillis());
- }
+ LOG.info("merging indexes to: " + outputIndex);
+
+ FileSystem localFs = FileSystem.getLocal(getConf());
if (localFs.exists(localWorkingDir)) {
localFs.delete(localWorkingDir);
}
@@ -73,6 +70,10 @@
// Get local output target
//
FileSystem fs = FileSystem.get(getConf());
+ if (fs.exists(outputIndex)) {
+ throw new FileAlreadyExistsException("Output directory " + outputIndex + " already exists!");
+ }
+
Path tmpLocalOutput = new Path(localWorkingDir, "merge-output");
Path localOutput = fs.startLocalOutput(outputIndex, tmpLocalOutput);
@@ -83,8 +84,6 @@
}
//
-
- //
// Merge indices
//
IndexWriter writer = new IndexWriter(localOutput.toString(), null, true);
@@ -102,8 +101,7 @@
// Put target back
//
fs.completeLocalOutput(outputIndex, tmpLocalOutput);
- FileSystem.getLocal(conf).delete(localWorkingDir);
- if (LOG.isInfoEnabled()) { LOG.info("done merging"); }
+ LOG.info("done merging");
}
/**
@@ -125,9 +123,9 @@
// Parse args, read all index directories to be processed
//
FileSystem fs = FileSystem.get(conf);
- List indexDirs = new ArrayList();
+ List<Path> indexDirs = new ArrayList<Path>();
- Path workDir = null;
+ Path workDir = new Path("indexmerger-" + System.currentTimeMillis());
int i = 0;
if ("-workingdir".equals(args[i])) {
i++;
@@ -152,6 +150,8 @@
} catch (Exception e) {
LOG.fatal("IndexMerger: " + StringUtils.stringifyException(e));
return -1;
+ } finally {
+ FileSystem.getLocal(conf).delete(workDir);
}
}
}