You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ma...@apache.org on 2012/06/12 12:41:51 UTC

svn commit: r1349240 - in /nutch/trunk: CHANGES.txt src/java/org/apache/nutch/scoring/webgraph/WebGraph.java

Author: markus
Date: Tue Jun 12 10:41:51 2012
New Revision: 1349240

URL: http://svn.apache.org/viewvc?rev=1349240&view=rev
Log:
NUTCH-1330 WebGraph OutlinkDB to preserve back up

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java

Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1349240&r1=1349239&r2=1349240&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Tue Jun 12 10:41:51 2012
@@ -2,6 +2,8 @@ Nutch Change Log
 
 (trunk) Current Development:
 
+* NUTCH-1330 WebGraph OutlinkDB to preserve back up (markus)
+
 * NUTCH-1319 HostNormalizer plugin (markus)
 
 * NUTCH-1386 Headings filter not to add empty values (markus)

Modified: nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java?rev=1349240&r1=1349239&r2=1349240&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java Tue Jun 12 10:41:51 2012
@@ -100,7 +100,8 @@ public class WebGraph
   public static final Logger LOG = LoggerFactory.getLogger(WebGraph.class);
   public static final String LOCK_NAME = ".locked";
   public static final String INLINK_DIR = "inlinks";
-  public static final String OUTLINK_DIR = "outlinks";
+  public static final String OUTLINK_DIR = "outlinks/current";
+  public static final String OLD_OUTLINK_DIR = "outlinks/old";
   public static final String NODE_DIR = "nodes";
 
   /**
@@ -554,6 +555,7 @@ public class WebGraph
 
     // outlink and temp outlink database paths
     Path outlinkDb = new Path(webGraphDb, OUTLINK_DIR);
+    Path oldOutlinkDb = new Path(webGraphDb, OLD_OUTLINK_DIR);
 
     if (!fs.exists(outlinkDb)) {
       fs.mkdirs(outlinkDb);
@@ -565,6 +567,7 @@ public class WebGraph
     outlinkJob.setJobName("Outlinkdb: " + outlinkDb);
 
     boolean deleteGone = conf.getBoolean("link.delete.gone", false);
+    boolean preserveBackup = conf.getBoolean("db.preserve.backup", true);
 
     if (deleteGone) {
       LOG.info("OutlinkDb: deleting gone links");
@@ -612,7 +615,9 @@ public class WebGraph
       LOG.info("OutlinkDb: running");
       JobClient.runJob(outlinkJob);
       LOG.info("OutlinkDb: installing " + outlinkDb);
+      FSUtils.replace(fs, oldOutlinkDb, outlinkDb, true);
       FSUtils.replace(fs, outlinkDb, tempOutlinkDb, true);
+      if (!preserveBackup && fs.exists(oldOutlinkDb)) fs.delete(oldOutlinkDb, true);
       LOG.info("OutlinkDb: finished");
     }
     catch (IOException e) {