You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ma...@apache.org on 2011/10/03 12:57:35 UTC

svn commit: r1178376 - in /nutch/trunk: CHANGES.txt src/java/org/apache/nutch/crawl/LinkDb.java

Author: markus
Date: Mon Oct  3 10:57:33 2011
New Revision: 1178376

URL: http://svn.apache.org/viewvc?rev=1178376&view=rev
Log:
NUTCH-1137 LinkDB other options ignored with -dir

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java

Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1178376&r1=1178375&r2=1178376&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Mon Oct  3 10:57:33 2011
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release 1.4 - Current development
 
+* NUTCH-1137 LinkDB invertlinks other options ignored when using -dir option (Sebastian Nagel, markus)
+
 * NUTCH-1141 Configurable Fetcher queue depth (jnioche)
 
 * NUTCH-1091 Remove commons logging dependency from Nutch branch and trunk (lewismc)

Modified: nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java?rev=1178376&r1=1178375&r2=1178376&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java Mon Oct  3 10:57:33 2011
@@ -150,7 +150,6 @@ public class LinkDb extends Configured i
   }
 
   public void invert(Path linkDb, Path[] segments, boolean normalize, boolean filter, boolean force) throws IOException {
-
     Path lock = new Path(linkDb, LOCK_NAME);
     FileSystem fs = FileSystem.get(getConf());
     LockUtil.createLockFile(fs, lock, force);
@@ -164,6 +163,7 @@ public class LinkDb extends Configured i
       LOG.info("LinkDb: URL normalize: " + normalize);
       LOG.info("LinkDb: URL filter: " + filter);
     }
+
     JobConf job = LinkDb.createJob(getConf(), linkDb, normalize, filter);
     for (int i = 0; i < segments.length; i++) {
       if (LOG.isInfoEnabled()) {
@@ -255,7 +255,7 @@ public class LinkDb extends Configured i
     int res = ToolRunner.run(NutchConfiguration.create(), new LinkDb(), args);
     System.exit(res);
   }
-  
+
   public int run(String[] args) throws Exception {
     if (args.length < 2) {
       System.err.println("Usage: LinkDb <linkdb> (-dir <segmentsDir> | <seg1> <seg2> ...) [-force] [-noNormalize] [-noFilter]");
@@ -276,10 +276,8 @@ public class LinkDb extends Configured i
     boolean force = false;
     for (int i = 1; i < args.length; i++) {
       if (args[i].equals("-dir")) {
-        segDir = new Path(args[++i]);
-        FileStatus[] files = fs.listStatus(segDir, HadoopFSUtil.getPassDirectoriesFilter(fs));
-        if (files != null) segs.addAll(Arrays.asList(HadoopFSUtil.getPaths(files)));
-        break;
+        FileStatus[] paths = fs.listStatus(new Path(args[++i]), HadoopFSUtil.getPassDirectoriesFilter(fs));
+        segs.addAll(Arrays.asList(HadoopFSUtil.getPaths(paths)));
       } else if (args[i].equalsIgnoreCase("-noNormalize")) {
         normalize = false;
       } else if (args[i].equalsIgnoreCase("-noFilter")) {