You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ma...@apache.org on 2015/04/21 09:43:32 UTC

svn commit: r1675058 - in /nutch/trunk: CHANGES.txt src/java/org/apache/nutch/segment/SegmentMerger.java

Author: markus
Date: Tue Apr 21 07:43:32 2015
New Revision: 1675058

URL: http://svn.apache.org/r1675058
Log:
NUTCH-1697 SegmentMerger to implement Tool

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java

Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1675058&r1=1675057&r2=1675058&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Tue Apr 21 07:43:32 2015
@@ -2,6 +2,8 @@ Nutch Change Log
  
 Nutch Current Development 1.10-SNAPSHOT
 
+* NUTCH-1697 SegmentMerger to implement Tool (markus, snagel)
+
 * NUTCH-1987 - Make bin/crawl indexer agnostic (Michael Joyce, snagel via mattmann)
  
 * NUTCH-1854 bin/crawl fails with a parsing fetcher (Asitang Mishra via snagel)

Modified: nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java?rev=1675058&r1=1675057&r2=1675058&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java Tue Apr 21 07:43:32 2015
@@ -51,6 +51,8 @@ import org.apache.hadoop.mapred.Sequence
 import org.apache.hadoop.mapred.SequenceFileOutputFormat;
 import org.apache.hadoop.mapred.SequenceFileRecordReader;
 import org.apache.hadoop.util.Progressable;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.crawl.Generator;
 import org.apache.nutch.metadata.MetaWrapper;
@@ -118,7 +120,7 @@ import org.apache.nutch.util.NutchJob;
  * 
  * @author Andrzej Bialecki
  */
-public class SegmentMerger extends Configured implements
+public class SegmentMerger extends Configured implements Tool,
     Mapper<Text, MetaWrapper, Text, MetaWrapper>,
     Reducer<Text, MetaWrapper, Text, MetaWrapper> {
   private static final Logger LOG = LoggerFactory
@@ -691,7 +693,7 @@ public class SegmentMerger extends Confi
   /**
    * @param args
    */
-  public static void main(String[] args) throws Exception {
+  public int run(String[] args)  throws Exception {
     if (args.length < 2) {
       System.err
           .println("SegmentMerger output_dir (-dir segments | seg1 seg2 ...) [-filter] [-slice NNNN]");
@@ -706,7 +708,7 @@ public class SegmentMerger extends Confi
           .println("\t-normalize\t\tnormalize URL via current URLNormalizers");
       System.err
           .println("\t-slice NNNN\tcreate many output segments, each containing NNNN URLs");
-      return;
+      return -1;
     }
     Configuration conf = NutchConfiguration.create();
     final FileSystem fs = FileSystem.get(conf);
@@ -734,11 +736,18 @@ public class SegmentMerger extends Confi
     }
     if (segs.size() == 0) {
       System.err.println("ERROR: No input segments.");
-      return;
+      return -1;
     }
-    SegmentMerger merger = new SegmentMerger(conf);
-    merger.merge(out, segs.toArray(new Path[segs.size()]), filter, normalize,
+
+    merge(out, segs.toArray(new Path[segs.size()]), filter, normalize,
         sliceSize);
+    return 0;
+  }
+
+  public static void main(String[] args) throws Exception {
+    int result = ToolRunner.run(NutchConfiguration.create(),
+        new SegmentMerger(), args);
+    System.exit(result);
   }
 
 }