You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by sn...@apache.org on 2016/12/01 10:58:43 UTC
nutch git commit: NUTCH-2336 SegmentReader to implement Tool
(contributed by Vincent Slot), closes #159
Repository: nutch
Updated Branches:
refs/heads/master 24cc2aa9c -> 6e051f2cc
NUTCH-2336 SegmentReader to implement Tool (contributed by Vincent Slot), closes #159
Project: http://git-wip-us.apache.org/repos/asf/nutch/repo
Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/6e051f2c
Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/6e051f2c
Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/6e051f2c
Branch: refs/heads/master
Commit: 6e051f2ccadba6c6bac60ee8708ced958a30cc8b
Parents: 24cc2aa
Author: Sebastian Nagel <sn...@apache.org>
Authored: Wed Nov 30 17:05:15 2016 +0100
Committer: Sebastian Nagel <sn...@apache.org>
Committed: Thu Dec 1 11:57:32 2016 +0100
----------------------------------------------------------------------
.../org/apache/nutch/segment/SegmentReader.java | 45 ++++++++++++++------
1 file changed, 33 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/nutch/blob/6e051f2c/src/java/org/apache/nutch/segment/SegmentReader.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/segment/SegmentReader.java b/src/java/org/apache/nutch/segment/SegmentReader.java
index d00d1e2..9ea20be 100644
--- a/src/java/org/apache/nutch/segment/SegmentReader.java
+++ b/src/java/org/apache/nutch/segment/SegmentReader.java
@@ -59,6 +59,8 @@ import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.util.Progressable;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.NutchWritable;
import org.apache.nutch.parse.ParseData;
@@ -69,7 +71,7 @@ import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
/** Dump the content of a segment. */
-public class SegmentReader extends Configured implements
+public class SegmentReader extends Configured implements Tool,
Reducer<Text, NutchWritable, Text, Text> {
public static final Logger LOG = LoggerFactory.getLogger(SegmentReader.class);
@@ -574,10 +576,10 @@ public class SegmentReader extends Configured implements
private static final int MODE_GET = 2;
- public static void main(String[] args) throws Exception {
+ public int run(String[] args) throws Exception {
if (args.length < 2) {
usage();
- return;
+ return -1;
}
int mode = -1;
if (args[0].equals("-dump"))
@@ -622,20 +624,33 @@ public class SegmentReader extends Configured implements
// collect required args
switch (mode) {
case MODE_DUMP:
+
+ this.co = co;
+ this.fe = fe;
+ this.ge = ge;
+ this.pa = pa;
+ this.pd = pd;
+ this.pt = pt;
+ try {
+ this.fs = FileSystem.get(getConf());
+ } catch (IOException e) {
+ LOG.error("IOException:", e);
+ }
+
String input = args[1];
if (input == null) {
System.err.println("Missing required argument: <segment_dir>");
usage();
- return;
+ return -1;
}
String output = args.length > 2 ? args[2] : null;
if (output == null) {
System.err.println("Missing required argument: <output>");
usage();
- return;
+ return -1;
}
- segmentReader.dump(new Path(input), new Path(output));
- return;
+ dump(new Path(input), new Path(output));
+ return 0;
case MODE_LIST:
ArrayList<Path> dirs = new ArrayList<Path>();
for (int i = 1; i < args.length; i++) {
@@ -653,27 +668,27 @@ public class SegmentReader extends Configured implements
dirs.add(new Path(args[i]));
}
segmentReader.list(dirs, new OutputStreamWriter(System.out, "UTF-8"));
- return;
+ return 0;
case MODE_GET:
input = args[1];
if (input == null) {
System.err.println("Missing required argument: <segment_dir>");
usage();
- return;
+ return -1;
}
String key = args.length > 2 ? args[2] : null;
if (key == null) {
System.err.println("Missing required argument: <keyValue>");
usage();
- return;
+ return -1;
}
segmentReader.get(new Path(input), new Text(key), new OutputStreamWriter(
System.out, "UTF-8"), new HashMap<String, List<Writable>>());
- return;
+ return 0;
default:
System.err.println("Invalid operation: " + args[0]);
usage();
- return;
+ return -1;
}
}
@@ -716,4 +731,10 @@ public class SegmentReader extends Configured implements
System.err
.println("\t\tNote: put double-quotes around strings with spaces.");
}
+
+ public static void main(String[] args) throws Exception {
+ int result = ToolRunner.run(NutchConfiguration.create(),
+ new SegmentReader(), args);
+ System.exit(result);
+ }
}