You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ma...@apache.org on 2012/06/12 12:41:51 UTC
svn commit: r1349240 - in /nutch/trunk: CHANGES.txt
src/java/org/apache/nutch/scoring/webgraph/WebGraph.java
Author: markus
Date: Tue Jun 12 10:41:51 2012
New Revision: 1349240
URL: http://svn.apache.org/viewvc?rev=1349240&view=rev
Log:
NUTCH-1330 WebGraph OutlinkDB to preserve back up
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java
Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1349240&r1=1349239&r2=1349240&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Tue Jun 12 10:41:51 2012
@@ -2,6 +2,8 @@ Nutch Change Log
(trunk) Current Development:
+* NUTCH-1330 WebGraph OutlinkDB to preserve back up (markus)
+
* NUTCH-1319 HostNormalizer plugin (markus)
* NUTCH-1386 Headings filter not to add empty values (markus)
Modified: nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java?rev=1349240&r1=1349239&r2=1349240&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java Tue Jun 12 10:41:51 2012
@@ -100,7 +100,8 @@ public class WebGraph
public static final Logger LOG = LoggerFactory.getLogger(WebGraph.class);
public static final String LOCK_NAME = ".locked";
public static final String INLINK_DIR = "inlinks";
- public static final String OUTLINK_DIR = "outlinks";
+ public static final String OUTLINK_DIR = "outlinks/current";
+ public static final String OLD_OUTLINK_DIR = "outlinks/old";
public static final String NODE_DIR = "nodes";
/**
@@ -554,6 +555,7 @@ public class WebGraph
// outlink and temp outlink database paths
Path outlinkDb = new Path(webGraphDb, OUTLINK_DIR);
+ Path oldOutlinkDb = new Path(webGraphDb, OLD_OUTLINK_DIR);
if (!fs.exists(outlinkDb)) {
fs.mkdirs(outlinkDb);
@@ -565,6 +567,7 @@ public class WebGraph
outlinkJob.setJobName("Outlinkdb: " + outlinkDb);
boolean deleteGone = conf.getBoolean("link.delete.gone", false);
+ boolean preserveBackup = conf.getBoolean("db.preserve.backup", true);
if (deleteGone) {
LOG.info("OutlinkDb: deleting gone links");
@@ -612,7 +615,9 @@ public class WebGraph
LOG.info("OutlinkDb: running");
JobClient.runJob(outlinkJob);
LOG.info("OutlinkDb: installing " + outlinkDb);
+ FSUtils.replace(fs, oldOutlinkDb, outlinkDb, true);
FSUtils.replace(fs, outlinkDb, tempOutlinkDb, true);
+ if (!preserveBackup && fs.exists(oldOutlinkDb)) fs.delete(oldOutlinkDb, true);
LOG.info("OutlinkDb: finished");
}
catch (IOException e) {