You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2014/04/14 20:33:49 UTC

svn commit: r1587275 - in /nutch/branches/2.x: ./ conf/ src/java/org/apache/nutch/api/

Author: lewismc
Date: Mon Apr 14 18:33:48 2014
New Revision: 1587275

URL: http://svn.apache.org/r1587275
Log:
NUTCH-1731 Better cmd line parsing for NutchServer

Modified:
    nutch/branches/2.x/CHANGES.txt
    nutch/branches/2.x/conf/log4j.properties
    nutch/branches/2.x/src/java/org/apache/nutch/api/AdminResource.java
    nutch/branches/2.x/src/java/org/apache/nutch/api/JobManager.java
    nutch/branches/2.x/src/java/org/apache/nutch/api/NutchApp.java
    nutch/branches/2.x/src/java/org/apache/nutch/api/NutchServer.java
    nutch/branches/2.x/src/java/org/apache/nutch/api/Params.java

Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1587275&r1=1587274&r2=1587275&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Mon Apr 14 18:33:48 2014
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Current Development
 
+* NUTCH-1732 Better cmd line parsing for NutchServer (Fjodor Vershinin via lewismc)
+
 * NUTCH-1751 Empty anchors should not index (Sertac TURKEL via lewismc)
 
 * NUTCH-1733 parse-html to support HTML5 charset definitions (snagel)

Modified: nutch/branches/2.x/conf/log4j.properties
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/conf/log4j.properties?rev=1587275&r1=1587274&r2=1587275&view=diff
==============================================================================
--- nutch/branches/2.x/conf/log4j.properties (original)
+++ nutch/branches/2.x/conf/log4j.properties Mon Apr 14 18:33:48 2014
@@ -40,6 +40,7 @@ log4j.logger.org.apache.nutch.host.HostD
 log4j.logger.org.apache.nutch.parse.ParserChecker=INFO,cmdstdout
 log4j.logger.org.apache.nutch.indexer.IndexingFiltersChecker=INFO,cmdstdout
 log4j.logger.org.apache.nutch.plugin.PluginRepository=WARN
+log4j.logger.org.apache.nutch.api.NutchServer=INFO,cmdstdout
 
 log4j.logger.org.apache.nutch=INFO
 log4j.logger.org.apache.hadoop=WARN

Modified: nutch/branches/2.x/src/java/org/apache/nutch/api/AdminResource.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/api/AdminResource.java?rev=1587275&r1=1587274&r2=1587275&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/api/AdminResource.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/api/AdminResource.java Mon Apr 14 18:33:48 2014
@@ -16,16 +16,21 @@
  ******************************************************************************/
 package org.apache.nutch.api;
 
-import java.util.HashMap;
 import java.util.Map;
 
+import org.apache.commons.lang.BooleanUtils;
+import org.apache.commons.lang.StringUtils;
 import org.apache.nutch.api.JobStatus.State;
 import org.restlet.resource.Get;
 import org.restlet.resource.ServerResource;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import com.google.common.collect.Maps;
+
 public class AdminResource extends ServerResource {
+  private static final int SHUTDOWN_DELAY = 1000;
+
   private static final Logger LOG = LoggerFactory.getLogger(AdminResource.class);
 
   public static final String PATH = "admin";
@@ -33,41 +38,57 @@ public class AdminResource extends Serve
 
   @Get("json")
   public Object execute() throws Exception {
-    String cmd = (String)getRequestAttributes().get(Params.CMD);
-    if ("status".equalsIgnoreCase(cmd)) {
-      // status
-      Map<String,Object> res = new HashMap<String,Object>();
-      res.put("started", NutchApp.started);
-      Map<String,Object> jobs = new HashMap<String,Object>();      
-      jobs.put("all", NutchApp.jobMgr.list(null, State.ANY));
-      jobs.put("running", NutchApp.jobMgr.list(null, State.RUNNING));
-      res.put("jobs", jobs);
-      res.put("confs", NutchApp.confMgr.list());
-      return res;
-    } else if ("stop".equalsIgnoreCase(cmd)) {
-      // stop
-      if (NutchApp.server.canStop()) {
-        Thread t = new Thread() {
-          public void run() {
-            try {
-              Thread.sleep(1000);
-              NutchApp.server.stop(false);
-              LOG.info("Service stopped.");
-            } catch (Exception e) {
-              LOG.error("Error stopping", e);
-            };
-          }
-        };
-        t.setDaemon(true);
-        t.start();
-        LOG.info("Service shutting down...");
-        return "stopping";
-      } else {
-        LOG.info("Command 'stop' denied due to unfinished jobs");
-        return "can't stop now";
-      }
-    } else {
-      return "Unknown command " + cmd;
+    Map<String, Object> attributes = getRequestAttributes();
+    String cmd = (String) attributes.get(Params.CMD);
+
+    if (StringUtils.equalsIgnoreCase("status", cmd)) {
+      return getNutchStatus();
+    }
+    
+    if (StringUtils.equalsIgnoreCase("stop", cmd)) {
+      boolean force = BooleanUtils.toBoolean(getQuery().getFirstValue(Params.FORCE));
+      return stopServer(force);
+    }
+    return "Unknown command " + cmd;
+  }
+
+  private String stopServer(boolean force) throws Exception {
+    if (!canStopServer(force)) {
+      LOG.info("Command 'stop' denied due to unfinished jobs");
+      return "can't stop now";
     }
+    
+    Thread t = new Thread() {
+      public void run() {
+        try {
+          Thread.sleep(SHUTDOWN_DELAY);
+          NutchApp.server.stop(false);
+          LOG.info("Service stopped.");
+        } catch (Exception e) {
+          LOG.error("Error stopping", e);
+        }
+      }
+    };
+    t.setDaemon(true);
+    t.start();
+    LOG.info("Service shutting down...");
+    return "stopping";
+  }
+
+  private boolean canStopServer(boolean force) throws Exception {
+    return force || NutchApp.server.canStop();
+  }
+
+  private Map<String, Object> getNutchStatus() throws Exception {
+    Map<String, Object> res = Maps.newHashMap();
+    res.put("started", NutchApp.started);
+
+    Map<String, Object> jobs = Maps.newHashMap();
+    jobs.put("all", NutchApp.jobMgr.list(null, State.ANY));
+    jobs.put("running", NutchApp.jobMgr.list(null, State.RUNNING));
+    res.put("jobs", jobs);
+    res.put("confs", NutchApp.confMgr.list());
+    
+    return res;
   }
 }

Modified: nutch/branches/2.x/src/java/org/apache/nutch/api/JobManager.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/api/JobManager.java?rev=1587275&r1=1587274&r2=1587275&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/api/JobManager.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/api/JobManager.java Mon Apr 14 18:33:48 2014
@@ -18,8 +18,6 @@ package org.apache.nutch.api;
 
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
-
 import org.apache.nutch.api.JobStatus.State;
 
 public interface JobManager {

Modified: nutch/branches/2.x/src/java/org/apache/nutch/api/NutchApp.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/api/NutchApp.java?rev=1587275&r1=1587274&r2=1587275&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/api/NutchApp.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/api/NutchApp.java Mon Apr 14 18:33:48 2014
@@ -16,6 +16,8 @@
  ******************************************************************************/
 package org.apache.nutch.api;
 
+import java.util.logging.Level;
+
 import org.apache.nutch.api.impl.RAMConfManager;
 import org.apache.nutch.api.impl.RAMJobManager;
 import org.restlet.Application;
@@ -41,7 +43,7 @@ public class NutchApp extends Applicatio
       getTunnelService().setEnabled(true);
       getTunnelService().setExtensionsTunnel(true);
       Router router = new Router(getContext());
-      //router.getLogger().setLevel(Level.FINEST);
+      router.getLogger().setLevel(Level.FINEST);
       // configs
       router.attach("/", APIInfoResource.class);
       router.attach("/" + AdminResource.PATH, AdminResource.class);

Modified: nutch/branches/2.x/src/java/org/apache/nutch/api/NutchServer.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/api/NutchServer.java?rev=1587275&r1=1587274&r2=1587275&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/api/NutchServer.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/api/NutchServer.java Mon Apr 14 18:33:48 2014
@@ -19,77 +19,195 @@ package org.apache.nutch.api;
 import java.util.List;
 import java.util.logging.Level;
 
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.PosixParser;
+import org.apache.commons.lang.StringUtils;
 import org.apache.nutch.api.JobStatus.State;
 import org.restlet.Component;
 import org.restlet.data.Protocol;
+import org.restlet.data.Reference;
+import org.restlet.representation.Representation;
+import org.restlet.resource.ClientResource;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 public class NutchServer {
   private static final Logger LOG = LoggerFactory.getLogger(NutchServer.class);
+
+  private static final String LOCALHOST = "localhost";
+  private static final String DEFAULT_LOG_LEVEL = "INFO";
+  private static final Integer DEFAULT_PORT = 8081;
+  
+  private static String logLevel = DEFAULT_LOG_LEVEL;
+  private static Integer port = DEFAULT_PORT;
+  
+  private static final String CMD_HELP = "help";
+  private static final String CMD_STOP = "stop";
+  private static final String CMD_PORT = "port";
+  private static final String CMD_LOG_LEVEL = "log";
+
   
   private Component component;
   private NutchApp app;
-  private int port;
   private boolean running;
-  
-  public NutchServer(int port) {
-    this.port = port;
-    // Create a new Component. 
+
+  /**
+   * Public constructor which accepts the port we wish to run the server on as
+   * well as the logging granularity. If the latter option is not provided via
+   * {@link org.apache.nutch.api.NutchServer#main(String[])} then it defaults to
+   * 'INFO' however best attempts should always be made to specify a logging
+   * level.
+   */
+  public NutchServer() {
+    // Create a new Component.
     component = new Component();
-    //component.getLogger().setLevel(Level.FINEST);
-   
-    // Add a new HTTP server listening on port 8182. 
-    component.getServers().add(Protocol.HTTP, port); 
-   
-    // Attach the application. 
+    component.getLogger().setLevel(Level.parse(logLevel));
+
+    // Add a new HTTP server listening on defined port.
+    component.getServers().add(Protocol.HTTP, port);
+
+    // Attach the application.
     app = new NutchApp();
-    component.getDefaultHost().attach("/nutch", app);
+    component.getDefaultHost().attach("/nutch", app); 
+
     NutchApp.server = this;
   }
-  
+
+  /**
+   * Convenience method to determine whether a Nutch server is running.
+   * 
+   * @return true if a server instance is running.
+   */
   public boolean isRunning() {
     return running;
   }
-  
+
+  /**
+   * Starts the Nutch server printing some logging to the log file.
+   * 
+   * @throws Exception
+   */
   public void start() throws Exception {
-    LOG.info("Starting NutchServer on port " + port + "...");
+    LOG.info("Starting NutchServer on port: {} with logging level: {} ...",
+        port, logLevel);
     component.start();
-    LOG.info("Started NutchServer on port " + port);
+    LOG.info("Started NutchServer on port {}", port);
     running = true;
     NutchApp.started = System.currentTimeMillis();
   }
-  
+
+  /**
+   * Safety and convenience method to determine whether or not it is safe to
+   * shut down the server. We make this assertion by consulting the
+   * {@link org.apache.nutch.api.NutchApp#jobMgr} for a list of jobs with
+   * {@link org.apache.nutch.api.JobStatus#state} equal to 'RUNNING'.
+   * 
+   * @return true if there are no jobs running or false if there are jobs with
+   *         running state.
+   * @throws Exception
+   */
   public boolean canStop() throws Exception {
     List<JobStatus> jobs = NutchApp.jobMgr.list(null, State.RUNNING);
-    if (!jobs.isEmpty()) {
-      return false;
-    }
-    return true;
+    return jobs.isEmpty();
   }
-  
+
+  /**
+   * Stop the Nutch server.
+   * 
+   * @param force
+   *          boolean method to effectively kill jobs regardless of state.
+   * @return true if no server is running or if the shutdown was successful.
+   *         Return false if there are running jobs and the force switch has not
+   *         been activated.
+   * @throws Exception
+   */
   public boolean stop(boolean force) throws Exception {
-    if (!running) {
+    if (!NutchApp.server.running) {
       return true;
     }
-    if (!canStop() && !force) {
+    if (!NutchApp.server.canStop() && !force) {
       LOG.warn("Running jobs - can't stop now.");
       return false;
     }
-    LOG.info("Stopping NutchServer on port " + port + "...");
+    LOG.info("Stopping NutchServer on port {}...", port);
     component.stop();
-    LOG.info("Stopped NutchServer on port " + port);
+    LOG.info("Stopped NutchServer on port {}", port);
     running = false;
     return true;
   }
 
-  public static void main(String[] args) throws Exception { 
-    if (args.length == 0) {
-      System.err.println("Usage: NutchServer <port>");
-      System.exit(-1);
+  public static void main(String[] args) throws Exception {
+    CommandLineParser parser = new PosixParser();
+    Options options = createOptions();
+    CommandLine commandLine = parser.parse(options, args);
+
+    if (commandLine.hasOption(CMD_HELP)) {
+      HelpFormatter formatter = new HelpFormatter();
+      formatter.printHelp("NutchServer", options, true);
+      return;
+    }
+    
+    if (commandLine.hasOption(CMD_LOG_LEVEL)) {
+      logLevel = commandLine.getOptionValue(CMD_LOG_LEVEL);
+    }
+    
+    if (commandLine.hasOption(CMD_PORT)) {
+      port = Integer.parseInt(commandLine.getOptionValue(CMD_PORT));
+    }
+
+    if (commandLine.hasOption(CMD_STOP)) {
+      String stopParameter = commandLine.getOptionValue(CMD_STOP);
+      boolean force = StringUtils.equals(Params.FORCE, stopParameter);
+      stopRemoteServer(force);
+      return;
     }
-    int port = Integer.parseInt(args[0]);
-    NutchServer server = new NutchServer(port);
+    
+    startServer();
+  }
+  
+  private static void startServer() throws Exception {
+    NutchServer server = new NutchServer();
     server.start();
   }
+  
+  private static void stopRemoteServer(boolean force) throws Exception {
+    Reference reference = new Reference(Protocol.HTTP, LOCALHOST, port);
+    reference.setPath("/nutch/admin/stop");
+    
+    if (force) {
+      reference.addQueryParameter(Params.FORCE, Params.TRUE);
+    }
+    
+    ClientResource clientResource = new ClientResource(reference);
+    Representation response = clientResource.get();
+    LOG.info("Server response: {} ", response.getText());
+  }
+
+  private static Options createOptions() {
+    Options options = new Options();
+    OptionBuilder.hasArg();
+    OptionBuilder.withArgName("logging level");
+    OptionBuilder.withDescription("Select a logging level for the NutchServer: \n"
+        + "ALL|CONFIG|FINER|FINEST|INFO|OFF|SEVERE|WARNING");
+    options.addOption(OptionBuilder.create(CMD_LOG_LEVEL));
+
+    OptionBuilder.withDescription("Stop running NutchServer. "
+        + "true value forces the Server to stop despite running jobs e.g. kills the tasks ");
+    OptionBuilder.hasOptionalArg();
+    OptionBuilder.withArgName("force");
+    options.addOption(OptionBuilder.create(CMD_STOP));
+
+    OptionBuilder.withDescription("Show this help");
+    options.addOption(OptionBuilder.create(CMD_HELP));
+
+    OptionBuilder.withDescription("Port to use for restful API.");
+    OptionBuilder.hasOptionalArg();
+    OptionBuilder.withArgName("port number");
+    options.addOption(OptionBuilder.create(CMD_PORT));
+    return options;
+  }
 }

Modified: nutch/branches/2.x/src/java/org/apache/nutch/api/Params.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/api/Params.java?rev=1587275&r1=1587274&r2=1587275&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/api/Params.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/api/Params.java Mon Apr 14 18:33:48 2014
@@ -33,4 +33,6 @@ public interface Params {
   public static final String JOB_CMD_STOP = "stop";
   public static final String JOB_CMD_ABORT = "abort";
   public static final String JOB_CMD_GET = "get";
+  
+  public static final String TRUE = "true";
 }