You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@nutch.apache.org by naegelejd <gi...@git.apache.org> on 2016/05/27 19:42:43 UTC

[GitHub] nutch pull request: NUTCH-2184 Enable IndexingJob to function with...

Github user naegelejd commented on a diff in the pull request:

    https://github.com/apache/nutch/pull/95#discussion_r64957448
  
    --- Diff: src/java/org/apache/nutch/indexer/IndexingJob.java ---
    @@ -155,43 +161,146 @@ public void index(Path crawlDb, Path linkDb, List<Path> segments,
                 counter.getName());
           }
           long end = System.currentTimeMillis();
    -      LOG.info("Indexer: finished at " + sdf.format(end) + ", elapsed: "
    -          + TimingUtil.elapsedTime(start, end));
    +      LOG.info("Indexer: finished at {}, elapsed: {}", sdf.format(end),
    +          TimingUtil.elapsedTime(start, end));
         } finally {
           FileSystem.get(job).delete(tmp, true);
         }
       }
     
       public int run(String[] args) throws Exception {
    -    if (args.length < 2) {
    -      System.err
    -      //.println("Usage: Indexer <crawldb> [-linkdb <linkdb>] [-params k1=v1&k2=v2...] (<segment> ... | -dir <segments>) [-noCommit] [-deleteGone] [-filter] [-normalize]");
    -      .println("Usage: Indexer <crawldb> [-linkdb <linkdb>] [-params k1=v1&k2=v2...] (<segment> ... | -dir <segments>) [-noCommit] [-deleteGone] [-filter] [-normalize] [-addBinaryContent] [-base64]");
    -      IndexWriters writers = new IndexWriters(getConf());
    -      System.err.println(writers.describe());
    -      return -1;
    -    }
    -
    -    final Path crawlDb = new Path(args[0]);
    -    Path linkDb = null;
    -
    -    final List<Path> segments = new ArrayList<Path>();
    -    String params = null;
    -
    -    boolean noCommit = false;
    -    boolean deleteGone = false;
    -    boolean filter = false;
    -    boolean normalize = false;
    -    boolean addBinaryContent = false;
    -    boolean base64 = false;
    +    // boolean options
    +    Option helpOpt = new Option("h", "help", false, "show this help message");
    +    // argument options
    +    @SuppressWarnings("static-access")
    +    Option crawldbOpt = OptionBuilder
    +    .withArgName("crawldb")
    +    .hasArg()
    +    .withDescription(
    +        "a crawldb directory to use with this tool (optional)")
    +    .create("crawldb");
    +    @SuppressWarnings("static-access")
    +    Option linkdbOpt = OptionBuilder
    +    .withArgName("linkdb")
    +    .hasArg()
    +    .withDescription(
    +        "a linkdb directory to use with this tool (optional)")
    +    .create("linkdb");
    +    @SuppressWarnings("static-access")
    +    Option paramsOpt = OptionBuilder
    +    .withArgName("params")
    +    .hasArg()
    +    .withDescription(
    +        "key value parameters to be used with this tool e.g. k1=v1&k2=v2... (optional)")
    +    .create("params");
    +    @SuppressWarnings("static-access")
    +    Option segOpt = OptionBuilder
    +    .withArgName("segment")
    +    .hasArgs()
    +    .withDescription("the segment(s) to use (either this or --segmentDir is mandatory)")
    +    .create("segment");
    +    @SuppressWarnings("static-access")
    +    Option segmentDirOpt = OptionBuilder
    +    .withArgName("segmentDir")
    +    .hasArg()
    +    .withDescription(
    +        "directory containing one or more segments to be used with this tool "
    +            + "(either this or --segment is mandatory)")
    +    .create("segmentDir");
    +    @SuppressWarnings("static-access")
    +    Option noCommitOpt = OptionBuilder
    +    .withArgName("noCommit")
    +    .withDescription(
    +        "do the commits once and for all the reducers in one go (optional)")
    --- End diff --
    
    This description is backward: the "-noCommit" option tells the Indexer *not* to do a final commit after the job finishes.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---