You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ma...@apache.org on 2015/04/21 09:43:32 UTC
svn commit: r1675058 - in /nutch/trunk: CHANGES.txt
src/java/org/apache/nutch/segment/SegmentMerger.java
Author: markus
Date: Tue Apr 21 07:43:32 2015
New Revision: 1675058
URL: http://svn.apache.org/r1675058
Log:
NUTCH-1697 SegmentMerger to implement Tool
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java
Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1675058&r1=1675057&r2=1675058&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Tue Apr 21 07:43:32 2015
@@ -2,6 +2,8 @@ Nutch Change Log
Nutch Current Development 1.10-SNAPSHOT
+* NUTCH-1697 SegmentMerger to implement Tool (markus, snagel)
+
* NUTCH-1987 - Make bin/crawl indexer agnostic (Michael Joyce, snagel via mattmann)
* NUTCH-1854 bin/crawl fails with a parsing fetcher (Asitang Mishra via snagel)
Modified: nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java?rev=1675058&r1=1675057&r2=1675058&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java Tue Apr 21 07:43:32 2015
@@ -51,6 +51,8 @@ import org.apache.hadoop.mapred.Sequence
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.SequenceFileRecordReader;
import org.apache.hadoop.util.Progressable;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.Generator;
import org.apache.nutch.metadata.MetaWrapper;
@@ -118,7 +120,7 @@ import org.apache.nutch.util.NutchJob;
*
* @author Andrzej Bialecki
*/
-public class SegmentMerger extends Configured implements
+public class SegmentMerger extends Configured implements Tool,
Mapper<Text, MetaWrapper, Text, MetaWrapper>,
Reducer<Text, MetaWrapper, Text, MetaWrapper> {
private static final Logger LOG = LoggerFactory
@@ -691,7 +693,7 @@ public class SegmentMerger extends Confi
/**
* @param args
*/
- public static void main(String[] args) throws Exception {
+ public int run(String[] args) throws Exception {
if (args.length < 2) {
System.err
.println("SegmentMerger output_dir (-dir segments | seg1 seg2 ...) [-filter] [-slice NNNN]");
@@ -706,7 +708,7 @@ public class SegmentMerger extends Confi
.println("\t-normalize\t\tnormalize URL via current URLNormalizers");
System.err
.println("\t-slice NNNN\tcreate many output segments, each containing NNNN URLs");
- return;
+ return -1;
}
Configuration conf = NutchConfiguration.create();
final FileSystem fs = FileSystem.get(conf);
@@ -734,11 +736,18 @@ public class SegmentMerger extends Confi
}
if (segs.size() == 0) {
System.err.println("ERROR: No input segments.");
- return;
+ return -1;
}
- SegmentMerger merger = new SegmentMerger(conf);
- merger.merge(out, segs.toArray(new Path[segs.size()]), filter, normalize,
+
+ merge(out, segs.toArray(new Path[segs.size()]), filter, normalize,
sliceSize);
+ return 0;
+ }
+
+ public static void main(String[] args) throws Exception {
+ int result = ToolRunner.run(NutchConfiguration.create(),
+ new SegmentMerger(), args);
+ System.exit(result);
}
}