You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@nutch.apache.org by te...@apache.org on 2014/01/02 20:40:18 UTC

svn commit: r1554883 - in /nutch/trunk: CHANGES.txt src/java/org/apache/nutch/crawl/CrawlDbMerger.java

Author: tejasp
Date: Thu Jan  2 19:40:18 2014
New Revision: 1554883

URL: http://svn.apache.org/r1554883
Log:
NUTCH-1670 set same crawldb directory in mergedb parameter

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbMerger.java

Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1554883&r1=1554882&r2=1554883&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Thu Jan  2 19:40:18 2014
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Nutch Development Trunk
 
+* NUTCH-1670 set same crawldb directory in mergedb parameter (lufeng via tejasp)
+
 * NUTCH-1080 Type safe members, arguments for better readability (tejasp)
 
 * NUTCH-1360 Suport the storing of IP address connected to when web crawling (lewismc, ferdy and Yasin KÄ±lÄ±nÃ§)

Modified: nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbMerger.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbMerger.java?rev=1554883&r1=1554882&r2=1554883&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbMerger.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbMerger.java Thu Jan  2 19:40:18 2014
@@ -125,6 +125,8 @@ public class CrawlDbMerger extends Confi
     }
     JobClient.runJob(job);
     FileSystem fs = FileSystem.get(getConf());
+    if(fs.exists(output))
+      fs.delete(output,true);
     fs.mkdirs(output);
     fs.rename(FileOutputFormat.getOutputPath(job), new Path(output, CrawlDb.CURRENT_NAME));
     long end = System.currentTimeMillis();