You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2014/04/14 20:33:49 UTC
svn commit: r1587275 - in /nutch/branches/2.x: ./ conf/
src/java/org/apache/nutch/api/
Author: lewismc
Date: Mon Apr 14 18:33:48 2014
New Revision: 1587275
URL: http://svn.apache.org/r1587275
Log:
NUTCH-1731 Better cmd line parsing for NutchServer
Modified:
nutch/branches/2.x/CHANGES.txt
nutch/branches/2.x/conf/log4j.properties
nutch/branches/2.x/src/java/org/apache/nutch/api/AdminResource.java
nutch/branches/2.x/src/java/org/apache/nutch/api/JobManager.java
nutch/branches/2.x/src/java/org/apache/nutch/api/NutchApp.java
nutch/branches/2.x/src/java/org/apache/nutch/api/NutchServer.java
nutch/branches/2.x/src/java/org/apache/nutch/api/Params.java
Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1587275&r1=1587274&r2=1587275&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Mon Apr 14 18:33:48 2014
@@ -2,6 +2,8 @@ Nutch Change Log
Current Development
+* NUTCH-1732 Better cmd line parsing for NutchServer (Fjodor Vershinin via lewismc)
+
* NUTCH-1751 Empty anchors should not index (Sertac TURKEL via lewismc)
* NUTCH-1733 parse-html to support HTML5 charset definitions (snagel)
Modified: nutch/branches/2.x/conf/log4j.properties
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/conf/log4j.properties?rev=1587275&r1=1587274&r2=1587275&view=diff
==============================================================================
--- nutch/branches/2.x/conf/log4j.properties (original)
+++ nutch/branches/2.x/conf/log4j.properties Mon Apr 14 18:33:48 2014
@@ -40,6 +40,7 @@ log4j.logger.org.apache.nutch.host.HostD
log4j.logger.org.apache.nutch.parse.ParserChecker=INFO,cmdstdout
log4j.logger.org.apache.nutch.indexer.IndexingFiltersChecker=INFO,cmdstdout
log4j.logger.org.apache.nutch.plugin.PluginRepository=WARN
+log4j.logger.org.apache.nutch.api.NutchServer=INFO,cmdstdout
log4j.logger.org.apache.nutch=INFO
log4j.logger.org.apache.hadoop=WARN
Modified: nutch/branches/2.x/src/java/org/apache/nutch/api/AdminResource.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/api/AdminResource.java?rev=1587275&r1=1587274&r2=1587275&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/api/AdminResource.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/api/AdminResource.java Mon Apr 14 18:33:48 2014
@@ -16,16 +16,21 @@
******************************************************************************/
package org.apache.nutch.api;
-import java.util.HashMap;
import java.util.Map;
+import org.apache.commons.lang.BooleanUtils;
+import org.apache.commons.lang.StringUtils;
import org.apache.nutch.api.JobStatus.State;
import org.restlet.resource.Get;
import org.restlet.resource.ServerResource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import com.google.common.collect.Maps;
+
public class AdminResource extends ServerResource {
+ private static final int SHUTDOWN_DELAY = 1000;
+
private static final Logger LOG = LoggerFactory.getLogger(AdminResource.class);
public static final String PATH = "admin";
@@ -33,41 +38,57 @@ public class AdminResource extends Serve
@Get("json")
public Object execute() throws Exception {
- String cmd = (String)getRequestAttributes().get(Params.CMD);
- if ("status".equalsIgnoreCase(cmd)) {
- // status
- Map<String,Object> res = new HashMap<String,Object>();
- res.put("started", NutchApp.started);
- Map<String,Object> jobs = new HashMap<String,Object>();
- jobs.put("all", NutchApp.jobMgr.list(null, State.ANY));
- jobs.put("running", NutchApp.jobMgr.list(null, State.RUNNING));
- res.put("jobs", jobs);
- res.put("confs", NutchApp.confMgr.list());
- return res;
- } else if ("stop".equalsIgnoreCase(cmd)) {
- // stop
- if (NutchApp.server.canStop()) {
- Thread t = new Thread() {
- public void run() {
- try {
- Thread.sleep(1000);
- NutchApp.server.stop(false);
- LOG.info("Service stopped.");
- } catch (Exception e) {
- LOG.error("Error stopping", e);
- };
- }
- };
- t.setDaemon(true);
- t.start();
- LOG.info("Service shutting down...");
- return "stopping";
- } else {
- LOG.info("Command 'stop' denied due to unfinished jobs");
- return "can't stop now";
- }
- } else {
- return "Unknown command " + cmd;
+ Map<String, Object> attributes = getRequestAttributes();
+ String cmd = (String) attributes.get(Params.CMD);
+
+ if (StringUtils.equalsIgnoreCase("status", cmd)) {
+ return getNutchStatus();
+ }
+
+ if (StringUtils.equalsIgnoreCase("stop", cmd)) {
+ boolean force = BooleanUtils.toBoolean(getQuery().getFirstValue(Params.FORCE));
+ return stopServer(force);
+ }
+ return "Unknown command " + cmd;
+ }
+
+ private String stopServer(boolean force) throws Exception {
+ if (!canStopServer(force)) {
+ LOG.info("Command 'stop' denied due to unfinished jobs");
+ return "can't stop now";
}
+
+ Thread t = new Thread() {
+ public void run() {
+ try {
+ Thread.sleep(SHUTDOWN_DELAY);
+ NutchApp.server.stop(false);
+ LOG.info("Service stopped.");
+ } catch (Exception e) {
+ LOG.error("Error stopping", e);
+ }
+ }
+ };
+ t.setDaemon(true);
+ t.start();
+ LOG.info("Service shutting down...");
+ return "stopping";
+ }
+
+ private boolean canStopServer(boolean force) throws Exception {
+ return force || NutchApp.server.canStop();
+ }
+
+ private Map<String, Object> getNutchStatus() throws Exception {
+ Map<String, Object> res = Maps.newHashMap();
+ res.put("started", NutchApp.started);
+
+ Map<String, Object> jobs = Maps.newHashMap();
+ jobs.put("all", NutchApp.jobMgr.list(null, State.ANY));
+ jobs.put("running", NutchApp.jobMgr.list(null, State.RUNNING));
+ res.put("jobs", jobs);
+ res.put("confs", NutchApp.confMgr.list());
+
+ return res;
}
}
Modified: nutch/branches/2.x/src/java/org/apache/nutch/api/JobManager.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/api/JobManager.java?rev=1587275&r1=1587274&r2=1587275&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/api/JobManager.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/api/JobManager.java Mon Apr 14 18:33:48 2014
@@ -18,8 +18,6 @@ package org.apache.nutch.api;
import java.util.List;
import java.util.Map;
-import java.util.Set;
-
import org.apache.nutch.api.JobStatus.State;
public interface JobManager {
Modified: nutch/branches/2.x/src/java/org/apache/nutch/api/NutchApp.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/api/NutchApp.java?rev=1587275&r1=1587274&r2=1587275&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/api/NutchApp.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/api/NutchApp.java Mon Apr 14 18:33:48 2014
@@ -16,6 +16,8 @@
******************************************************************************/
package org.apache.nutch.api;
+import java.util.logging.Level;
+
import org.apache.nutch.api.impl.RAMConfManager;
import org.apache.nutch.api.impl.RAMJobManager;
import org.restlet.Application;
@@ -41,7 +43,7 @@ public class NutchApp extends Applicatio
getTunnelService().setEnabled(true);
getTunnelService().setExtensionsTunnel(true);
Router router = new Router(getContext());
- //router.getLogger().setLevel(Level.FINEST);
+ router.getLogger().setLevel(Level.FINEST);
// configs
router.attach("/", APIInfoResource.class);
router.attach("/" + AdminResource.PATH, AdminResource.class);
Modified: nutch/branches/2.x/src/java/org/apache/nutch/api/NutchServer.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/api/NutchServer.java?rev=1587275&r1=1587274&r2=1587275&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/api/NutchServer.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/api/NutchServer.java Mon Apr 14 18:33:48 2014
@@ -19,77 +19,195 @@ package org.apache.nutch.api;
import java.util.List;
import java.util.logging.Level;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.PosixParser;
+import org.apache.commons.lang.StringUtils;
import org.apache.nutch.api.JobStatus.State;
import org.restlet.Component;
import org.restlet.data.Protocol;
+import org.restlet.data.Reference;
+import org.restlet.representation.Representation;
+import org.restlet.resource.ClientResource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class NutchServer {
private static final Logger LOG = LoggerFactory.getLogger(NutchServer.class);
+
+ private static final String LOCALHOST = "localhost";
+ private static final String DEFAULT_LOG_LEVEL = "INFO";
+ private static final Integer DEFAULT_PORT = 8081;
+
+ private static String logLevel = DEFAULT_LOG_LEVEL;
+ private static Integer port = DEFAULT_PORT;
+
+ private static final String CMD_HELP = "help";
+ private static final String CMD_STOP = "stop";
+ private static final String CMD_PORT = "port";
+ private static final String CMD_LOG_LEVEL = "log";
+
private Component component;
private NutchApp app;
- private int port;
private boolean running;
-
- public NutchServer(int port) {
- this.port = port;
- // Create a new Component.
+
+ /**
+ * Public constructor which accepts the port we wish to run the server on as
+ * well as the logging granularity. If the latter option is not provided via
+ * {@link org.apache.nutch.api.NutchServer#main(String[])} then it defaults to
+ * 'INFO' however best attempts should always be made to specify a logging
+ * level.
+ */
+ public NutchServer() {
+ // Create a new Component.
component = new Component();
- //component.getLogger().setLevel(Level.FINEST);
-
- // Add a new HTTP server listening on port 8182.
- component.getServers().add(Protocol.HTTP, port);
-
- // Attach the application.
+ component.getLogger().setLevel(Level.parse(logLevel));
+
+ // Add a new HTTP server listening on defined port.
+ component.getServers().add(Protocol.HTTP, port);
+
+ // Attach the application.
app = new NutchApp();
- component.getDefaultHost().attach("/nutch", app);
+ component.getDefaultHost().attach("/nutch", app);
+
NutchApp.server = this;
}
-
+
+ /**
+ * Convenience method to determine whether a Nutch server is running.
+ *
+ * @return true if a server instance is running.
+ */
public boolean isRunning() {
return running;
}
-
+
+ /**
+ * Starts the Nutch server printing some logging to the log file.
+ *
+ * @throws Exception
+ */
public void start() throws Exception {
- LOG.info("Starting NutchServer on port " + port + "...");
+ LOG.info("Starting NutchServer on port: {} with logging level: {} ...",
+ port, logLevel);
component.start();
- LOG.info("Started NutchServer on port " + port);
+ LOG.info("Started NutchServer on port {}", port);
running = true;
NutchApp.started = System.currentTimeMillis();
}
-
+
+ /**
+ * Safety and convenience method to determine whether or not it is safe to
+ * shut down the server. We make this assertion by consulting the
+ * {@link org.apache.nutch.api.NutchApp#jobMgr} for a list of jobs with
+ * {@link org.apache.nutch.api.JobStatus#state} equal to 'RUNNING'.
+ *
+ * @return true if there are no jobs running or false if there are jobs with
+ * running state.
+ * @throws Exception
+ */
public boolean canStop() throws Exception {
List<JobStatus> jobs = NutchApp.jobMgr.list(null, State.RUNNING);
- if (!jobs.isEmpty()) {
- return false;
- }
- return true;
+ return jobs.isEmpty();
}
-
+
+ /**
+ * Stop the Nutch server.
+ *
+ * @param force
+ * boolean method to effectively kill jobs regardless of state.
+ * @return true if no server is running or if the shutdown was successful.
+ * Return false if there are running jobs and the force switch has not
+ * been activated.
+ * @throws Exception
+ */
public boolean stop(boolean force) throws Exception {
- if (!running) {
+ if (!NutchApp.server.running) {
return true;
}
- if (!canStop() && !force) {
+ if (!NutchApp.server.canStop() && !force) {
LOG.warn("Running jobs - can't stop now.");
return false;
}
- LOG.info("Stopping NutchServer on port " + port + "...");
+ LOG.info("Stopping NutchServer on port {}...", port);
component.stop();
- LOG.info("Stopped NutchServer on port " + port);
+ LOG.info("Stopped NutchServer on port {}", port);
running = false;
return true;
}
- public static void main(String[] args) throws Exception {
- if (args.length == 0) {
- System.err.println("Usage: NutchServer <port>");
- System.exit(-1);
+ public static void main(String[] args) throws Exception {
+ CommandLineParser parser = new PosixParser();
+ Options options = createOptions();
+ CommandLine commandLine = parser.parse(options, args);
+
+ if (commandLine.hasOption(CMD_HELP)) {
+ HelpFormatter formatter = new HelpFormatter();
+ formatter.printHelp("NutchServer", options, true);
+ return;
+ }
+
+ if (commandLine.hasOption(CMD_LOG_LEVEL)) {
+ logLevel = commandLine.getOptionValue(CMD_LOG_LEVEL);
+ }
+
+ if (commandLine.hasOption(CMD_PORT)) {
+ port = Integer.parseInt(commandLine.getOptionValue(CMD_PORT));
+ }
+
+ if (commandLine.hasOption(CMD_STOP)) {
+ String stopParameter = commandLine.getOptionValue(CMD_STOP);
+ boolean force = StringUtils.equals(Params.FORCE, stopParameter);
+ stopRemoteServer(force);
+ return;
}
- int port = Integer.parseInt(args[0]);
- NutchServer server = new NutchServer(port);
+
+ startServer();
+ }
+
+ private static void startServer() throws Exception {
+ NutchServer server = new NutchServer();
server.start();
}
+
+ private static void stopRemoteServer(boolean force) throws Exception {
+ Reference reference = new Reference(Protocol.HTTP, LOCALHOST, port);
+ reference.setPath("/nutch/admin/stop");
+
+ if (force) {
+ reference.addQueryParameter(Params.FORCE, Params.TRUE);
+ }
+
+ ClientResource clientResource = new ClientResource(reference);
+ Representation response = clientResource.get();
+ LOG.info("Server response: {} ", response.getText());
+ }
+
+ private static Options createOptions() {
+ Options options = new Options();
+ OptionBuilder.hasArg();
+ OptionBuilder.withArgName("logging level");
+ OptionBuilder.withDescription("Select a logging level for the NutchServer: \n"
+ + "ALL|CONFIG|FINER|FINEST|INFO|OFF|SEVERE|WARNING");
+ options.addOption(OptionBuilder.create(CMD_LOG_LEVEL));
+
+ OptionBuilder.withDescription("Stop running NutchServer. "
+ + "true value forces the Server to stop despite running jobs e.g. kills the tasks ");
+ OptionBuilder.hasOptionalArg();
+ OptionBuilder.withArgName("force");
+ options.addOption(OptionBuilder.create(CMD_STOP));
+
+ OptionBuilder.withDescription("Show this help");
+ options.addOption(OptionBuilder.create(CMD_HELP));
+
+ OptionBuilder.withDescription("Port to use for restful API.");
+ OptionBuilder.hasOptionalArg();
+ OptionBuilder.withArgName("port number");
+ options.addOption(OptionBuilder.create(CMD_PORT));
+ return options;
+ }
}
Modified: nutch/branches/2.x/src/java/org/apache/nutch/api/Params.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/api/Params.java?rev=1587275&r1=1587274&r2=1587275&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/api/Params.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/api/Params.java Mon Apr 14 18:33:48 2014
@@ -33,4 +33,6 @@ public interface Params {
public static final String JOB_CMD_STOP = "stop";
public static final String JOB_CMD_ABORT = "abort";
public static final String JOB_CMD_GET = "get";
+
+ public static final String TRUE = "true";
}