You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2010/10/28 11:23:57 UTC
svn commit: r1028235 - in /nutch/trunk: ./ ivy/
src/java/org/apache/nutch/api/ src/java/org/apache/nutch/api/impl/
src/java/org/apache/nutch/crawl/ src/java/org/apache/nutch/fetcher/
src/java/org/apache/nutch/indexer/solr/ src/java/org/apache/nutch/par...
Author: ab
Date: Thu Oct 28 09:23:55 2010
New Revision: 1028235
URL: http://svn.apache.org/viewvc?rev=1028235&view=rev
Log:
NUTCH-880 REST API for Nutch.
Added:
nutch/trunk/src/java/org/apache/nutch/api/
nutch/trunk/src/java/org/apache/nutch/api/APIInfoResource.java (with props)
nutch/trunk/src/java/org/apache/nutch/api/ConfManager.java (with props)
nutch/trunk/src/java/org/apache/nutch/api/ConfResource.java (with props)
nutch/trunk/src/java/org/apache/nutch/api/JobManager.java (with props)
nutch/trunk/src/java/org/apache/nutch/api/JobResource.java (with props)
nutch/trunk/src/java/org/apache/nutch/api/JobStatus.java (with props)
nutch/trunk/src/java/org/apache/nutch/api/NutchApp.java (with props)
nutch/trunk/src/java/org/apache/nutch/api/NutchServer.java (with props)
nutch/trunk/src/java/org/apache/nutch/api/Params.java (with props)
nutch/trunk/src/java/org/apache/nutch/api/impl/
nutch/trunk/src/java/org/apache/nutch/api/impl/RAMConfManager.java (with props)
nutch/trunk/src/java/org/apache/nutch/api/impl/RAMJobManager.java (with props)
nutch/trunk/src/java/org/apache/nutch/util/NutchTool.java (with props)
nutch/trunk/src/test/org/apache/nutch/api/
nutch/trunk/src/test/org/apache/nutch/api/TestAPI.java (with props)
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/ivy/ivy.xml
nutch/trunk/ivy/ivysettings.xml
nutch/trunk/src/java/org/apache/nutch/crawl/DbUpdaterJob.java
nutch/trunk/src/java/org/apache/nutch/crawl/GeneratorJob.java
nutch/trunk/src/java/org/apache/nutch/crawl/InjectorJob.java
nutch/trunk/src/java/org/apache/nutch/crawl/WebTableReader.java
nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherJob.java
nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrIndexerJob.java
nutch/trunk/src/java/org/apache/nutch/parse/ParserJob.java
nutch/trunk/src/java/org/apache/nutch/util/LogUtil.java
Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1028235&r1=1028234&r2=1028235&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Thu Oct 28 09:23:55 2010
@@ -80,6 +80,8 @@ Release 2.0 - Current Development
* NUTCH-907 DataStore API doesn't support multiple storage areas for multiple disjoint crawls (Sertan Alkan via ab)
+* NUTCH-880 REST API for Nutch (ab)
+
Release 1.1 - 2010-06-06
Modified: nutch/trunk/ivy/ivy.xml
URL: http://svn.apache.org/viewvc/nutch/trunk/ivy/ivy.xml?rev=1028235&r1=1028234&r2=1028235&view=diff
==============================================================================
--- nutch/trunk/ivy/ivy.xml (original)
+++ nutch/trunk/ivy/ivy.xml Thu Oct 28 09:23:55 2010
@@ -110,12 +110,18 @@
<dependency org="org.hsqldb" name="hsqldb" rev="2.0.0" conf="*->default"/>
<dependency org="org.jdom" name="jdom" rev="1.1" conf="test->default"/>
- <dependency org="org.apache.gora" name="gora-sql" rev="0.1" conf="test->compile">
+ <dependency org="org.apache.gora" name="gora-sql" rev="0.1" conf="*->compile">
<exclude org="com.sun.jdmk"/>
<exclude org="com.sun.jmx"/>
<exclude org="javax.jms"/>
</dependency>
-
+ <dependency org="org.apache.gora" name="gora-solr" rev="0.1" conf="*->compile">
+ <exclude org="com.sun.jdmk"/>
+ <exclude org="com.sun.jmx"/>
+ <exclude org="javax.jms"/>
+ </dependency>
+ <dependency org="org.restlet.jse" name="org.restlet" rev="2.0.1" conf="*->default"/>
+ <dependency org="org.restlet.jse" name="org.restlet.ext.jackson" rev="2.0.1" conf="*->default"/>
</dependencies>
</ivy-module>
Modified: nutch/trunk/ivy/ivysettings.xml
URL: http://svn.apache.org/viewvc/nutch/trunk/ivy/ivysettings.xml?rev=1028235&r1=1028234&r2=1028235&view=diff
==============================================================================
--- nutch/trunk/ivy/ivysettings.xml (original)
+++ nutch/trunk/ivy/ivysettings.xml Thu Oct 28 09:23:55 2010
@@ -53,6 +53,11 @@
pattern="${maven2.pattern.ext}"
m2compatible="true"
/>
+ <ibiblio name="restlet"
+ root="http://maven.restlet.org"
+ pattern="${maven2.pattern.ext}"
+ m2compatible="true"
+ />
<chain name="default" dual="true">
<resolver ref="local"/>
<resolver ref="maven2"/>
@@ -67,6 +72,9 @@
<resolver ref="maven2"/>
<resolver ref="apache-snapshot"/>
</chain>
+ <chain name="restlet">
+ <resolver ref="restlet"/>
+ </chain>
</resolvers>
<modules>
@@ -76,5 +84,7 @@
-->
<module organisation="org.apache.nutch" name=".*" resolver="internal"/>
<module organisation="org.apache.gora" name=".*" resolver="internal"/>
+ <module organisation="org.restlet" name=".*" resolver="restlet"/>
+ <module organisation="org.restlet.jse" name=".*" resolver="restlet"/>
</modules>
</ivysettings>
Added: nutch/trunk/src/java/org/apache/nutch/api/APIInfoResource.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/api/APIInfoResource.java?rev=1028235&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/api/APIInfoResource.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/api/APIInfoResource.java Thu Oct 28 09:23:55 2010
@@ -0,0 +1,22 @@
+package org.apache.nutch.api;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.restlet.resource.Get;
+import org.restlet.resource.ServerResource;
+
+public class APIInfoResource extends ServerResource {
+ private static final List<String[]> info = new ArrayList<String[]>();
+
+ static {
+ info.add(new String[]{ConfResource.PATH, ConfResource.DESCR});
+ info.add(new String[]{JobResource.PATH, JobResource.DESCR});
+ }
+
+ @Get("json")
+ public List<String[]> retrieve() throws IOException {
+ return info;
+ }
+}
Propchange: nutch/trunk/src/java/org/apache/nutch/api/APIInfoResource.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/java/org/apache/nutch/api/ConfManager.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/api/ConfManager.java?rev=1028235&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/api/ConfManager.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/api/ConfManager.java Thu Oct 28 09:23:55 2010
@@ -0,0 +1,21 @@
+package org.apache.nutch.api;
+
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.hadoop.conf.Configuration;
+
+public interface ConfManager {
+
+ public Set<String> list() throws Exception;
+
+ public Configuration get(String confId);
+
+ public Map<String,String> getAsMap(String confId);
+
+ public void delete(String confId);
+
+ public void create(String confId, Map<String,String> props, boolean force) throws Exception;
+
+ public void setProperty(String confId, String propName, String propValue) throws Exception;
+}
Propchange: nutch/trunk/src/java/org/apache/nutch/api/ConfManager.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/java/org/apache/nutch/api/ConfResource.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/api/ConfResource.java?rev=1028235&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/api/ConfResource.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/api/ConfResource.java Thu Oct 28 09:23:55 2010
@@ -0,0 +1,82 @@
+package org.apache.nutch.api;
+
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.restlet.data.Form;
+import org.restlet.resource.Delete;
+import org.restlet.resource.Get;
+import org.restlet.resource.Post;
+import org.restlet.resource.Put;
+import org.restlet.resource.ServerResource;
+
+public class ConfResource extends ServerResource {
+
+ public static final String PATH = "confs";
+ public static final String DESCR = "Configuration manager";
+ public static final String DEFAULT_CONF = "default";
+
+ private AtomicInteger seqId = new AtomicInteger();
+
+ @Get("json")
+ public Object retrieve() throws Exception {
+ String id = (String)getRequestAttributes().get(Params.CONF_ID);
+ if (id == null) {
+ return NutchApp.confMgr.list();
+ } else {
+ String prop = (String)getRequestAttributes().get(Params.PROP_NAME);
+ if (prop == null) {
+ return NutchApp.confMgr.getAsMap(id);
+ } else {
+ Map<String,String> cfg = NutchApp.confMgr.getAsMap(id);
+ if (cfg == null) {
+ return null;
+ } else {
+ return cfg.get(prop);
+ }
+ }
+ }
+ }
+
+ @Put("json")
+ public String create(Map<String,Object> args) throws Exception {
+ System.out.println("args=" + args);
+ String id = (String)args.get(Params.CONF_ID);
+ if (id == null) {
+ id = String.valueOf(seqId.incrementAndGet());
+ }
+ Map<String,String> props = (Map<String,String>)args.get(Params.PROPS);
+ Boolean force = (Boolean)args.get(Params.FORCE);
+ boolean f = force != null ? force : false;
+ NutchApp.confMgr.create(id, props, f);
+ return id;
+ }
+
+ @Post("json")
+ public void update(Map<String,Object> args) throws Exception {
+ String id = (String)args.get(Params.CONF_ID);
+ if (id == null) id = (String)getRequestAttributes().get(Params.CONF_ID);
+ if (id == null) {
+ throw new Exception("Missing config id");
+ }
+ String prop = (String)args.get(Params.PROP_NAME);
+ if (prop == null) prop = (String)getRequestAttributes().get(Params.PROP_NAME);
+ if (prop == null) {
+ throw new Exception("Missing property name prop");
+ }
+ String value = (String)args.get(Params.PROP_VALUE);
+ if (value == null) {
+ throw new Exception("Missing property value");
+ }
+ NutchApp.confMgr.setProperty(id, prop, value);
+ }
+
+ @Delete
+ public void remove() throws Exception {
+ String id = (String)getRequestAttributes().get(Params.CONF_ID);
+ if (id == null) {
+ throw new Exception("Missing config id");
+ }
+ NutchApp.confMgr.delete(id);
+ }
+}
Propchange: nutch/trunk/src/java/org/apache/nutch/api/ConfResource.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/java/org/apache/nutch/api/JobManager.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/api/JobManager.java?rev=1028235&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/api/JobManager.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/api/JobManager.java Thu Oct 28 09:23:55 2010
@@ -0,0 +1,22 @@
+package org.apache.nutch.api;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.nutch.api.JobStatus.State;
+
+public interface JobManager {
+
+ public static enum JobType {INJECT, GENERATE, FETCH, PARSE, UPDATEDB, INDEX, CRAWL, READDB, CLASS};
+
+ public List<JobStatus> list(String crawlId, State state) throws Exception;
+
+ public Map<String,String> get(String crawlId, String id) throws Exception;
+
+ public String create(String crawlId, JobType type, String confId, Object... args) throws Exception;
+
+ public boolean abort(String crawlId, String id) throws Exception;
+
+ public boolean stop(String crawlId, String id) throws Exception;
+}
Propchange: nutch/trunk/src/java/org/apache/nutch/api/JobManager.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/java/org/apache/nutch/api/JobResource.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/api/JobResource.java?rev=1028235&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/api/JobResource.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/api/JobResource.java Thu Oct 28 09:23:55 2010
@@ -0,0 +1,55 @@
+package org.apache.nutch.api;
+
+import java.util.Map;
+
+import org.apache.nutch.api.JobManager.JobType;
+import org.apache.nutch.api.JobStatus.State;
+import org.restlet.resource.Get;
+import org.restlet.resource.Put;
+import org.restlet.resource.ServerResource;
+
+public class JobResource extends ServerResource {
+ public static final String PATH = "jobs";
+ public static final String DESCR = "Job manager";
+
+ @Get("json")
+ public Object retrieve() throws Exception {
+ String cid = (String)getRequestAttributes().get(Params.CRAWL_ID);
+ String jid = (String)getRequestAttributes().get(Params.JOB_ID);
+ if (jid == null) {
+ return NutchApp.jobMgr.list(cid, State.ANY);
+ } else {
+ // handle stop / abort / get
+ String cmd = (String)getRequestAttributes().get(Params.CMD);
+ if (cmd == null) {
+ return NutchApp.jobMgr.get(cid, jid);
+ }
+ if (cmd.equals(Params.JOB_CMD_STOP)) {
+ return NutchApp.jobMgr.abort(cid, jid);
+ } else if (cmd.equals(Params.JOB_CMD_ABORT)) {
+ return NutchApp.jobMgr.abort(cid, jid);
+ } else if (cmd.equals(Params.JOB_CMD_GET)) {
+ return NutchApp.jobMgr.get(cid, jid);
+ } else {
+ throw new Exception("Unknown command: " + cmd);
+ }
+ }
+ }
+
+ /*
+ * String crawlId
+ * String type
+ * String confId
+ * Object[] args
+ */
+ @Put("json")
+ public Object create(Map<String,Object> args) throws Exception {
+ String cid = (String)args.get(Params.CRAWL_ID);
+ String typeString = (String)args.get(Params.JOB_TYPE);
+ JobType type = JobType.valueOf(typeString);
+ String confId = (String)args.get(Params.CONF_ID);
+ Object[] cmdArgs = (Object[])args.get(Params.ARGS);
+ String jobId = NutchApp.jobMgr.create(cid, type, confId, cmdArgs);
+ return jobId;
+ }
+}
Propchange: nutch/trunk/src/java/org/apache/nutch/api/JobResource.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/java/org/apache/nutch/api/JobStatus.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/api/JobStatus.java?rev=1028235&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/api/JobStatus.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/api/JobStatus.java Thu Oct 28 09:23:55 2010
@@ -0,0 +1,27 @@
+package org.apache.nutch.api;
+
+import java.util.Map;
+
+import org.apache.nutch.api.JobManager.JobType;
+
+public class JobStatus {
+ public static enum State {IDLE, RUNNING, FINISHED, FAILED, KILLED, ANY};
+ public String id;
+ public JobType type;
+ public String confId;
+ public Object[] args;
+ public Map<String,Object> result;
+ public State state;
+ public String msg;
+
+ public JobStatus(String id, JobType type, String confId, Object[] args,
+ State state, String msg) {
+ this.id = id;
+ this.type = type;
+ this.confId = confId;
+ this.args = args;
+ this.state = state;
+ this.msg = msg;
+ }
+
+}
Propchange: nutch/trunk/src/java/org/apache/nutch/api/JobStatus.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/java/org/apache/nutch/api/NutchApp.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/api/NutchApp.java?rev=1028235&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/api/NutchApp.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/api/NutchApp.java Thu Oct 28 09:23:55 2010
@@ -0,0 +1,45 @@
+package org.apache.nutch.api;
+
+import java.util.logging.Level;
+
+import org.apache.nutch.api.impl.RAMConfManager;
+import org.apache.nutch.api.impl.RAMJobManager;
+import org.restlet.Application;
+import org.restlet.Restlet;
+import org.restlet.routing.Router;
+
+public class NutchApp extends Application {
+ public static ConfManager confMgr;
+ public static JobManager jobMgr;
+
+ static {
+ confMgr = new RAMConfManager();
+ jobMgr = new RAMJobManager();
+ }
+
+ /**
+ * Creates a root Restlet that will receive all incoming calls.
+ */
+ @Override
+ public synchronized Restlet createInboundRoot() {
+ getTunnelService().setEnabled(true);
+ getTunnelService().setExtensionsTunnel(true);
+ Router router = new Router(getContext());
+ router.getLogger().setLevel(Level.FINEST);
+ // configs
+ router.attach("/", APIInfoResource.class);
+ router.attach("/" + ConfResource.PATH, ConfResource.class);
+ router.attach("/" + ConfResource.PATH + "/{"+ Params.CONF_ID +
+ "}", ConfResource.class);
+ router.attach("/" + ConfResource.PATH + "/{" + Params.CONF_ID +
+ "}/{" + Params.PROP_NAME + "}", ConfResource.class);
+ // jobs
+ router.attach("/" + JobResource.PATH, JobResource.class);
+ router.attach("/" + JobResource.PATH + "/{" + Params.JOB_ID + "}",
+ JobResource.class);
+ router.attach("/" + JobResource.PATH, JobResource.class);
+ router.attach("/" + JobResource.PATH + "/{" + Params.JOB_ID + "}/{" +
+ Params.CMD + "}", JobResource.class);
+ return router;
+ }
+}
Propchange: nutch/trunk/src/java/org/apache/nutch/api/NutchApp.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/java/org/apache/nutch/api/NutchServer.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/api/NutchServer.java?rev=1028235&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/api/NutchServer.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/api/NutchServer.java Thu Oct 28 09:23:55 2010
@@ -0,0 +1,70 @@
+package org.apache.nutch.api;
+
+import java.util.List;
+import java.util.logging.Level;
+
+import org.apache.nutch.api.JobStatus.State;
+import org.restlet.Component;
+import org.restlet.data.Protocol;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class NutchServer {
+ private static final Logger LOG = LoggerFactory.getLogger(NutchServer.class);
+
+ private Component component;
+ private NutchApp app;
+ private int port;
+ private boolean running;
+
+ public NutchServer(int port) {
+ this.port = port;
+ // Create a new Component.
+ component = new Component();
+ component.getLogger().setLevel(Level.FINEST);
+
+ // Add a new HTTP server listening on port 8182.
+ component.getServers().add(Protocol.HTTP, port);
+
+ // Attach the application.
+ app = new NutchApp();
+ component.getDefaultHost().attach("/nutch", app);
+ }
+
+ public boolean isRunning() {
+ return running;
+ }
+
+ public void start() throws Exception {
+ LOG.info("Statring NutchServer on port " + port + "...");
+ component.start();
+ LOG.info("Started NutchServer on port " + port);
+ running = true;
+ }
+
+ public boolean stop(boolean force) throws Exception {
+ if (!running) {
+ return true;
+ }
+ List<JobStatus> jobs = NutchApp.jobMgr.list(null, State.RUNNING);
+ if (!jobs.isEmpty() && !force) {
+ LOG.warn("There are running jobs - NOT stopping at this time...");
+ return false;
+ }
+ LOG.info("Stopping NutchServer on port " + port + "...");
+ component.stop();
+ LOG.info("Stopped NutchServer on port " + port);
+ running = false;
+ return true;
+ }
+
+ public static void main(String[] args) throws Exception {
+ if (args.length == 0) {
+ System.err.println("Usage: NutchServer <port>");
+ System.exit(-1);
+ }
+ int port = Integer.parseInt(args[0]);
+ NutchServer server = new NutchServer(port);
+ server.start();
+ }
+}
Propchange: nutch/trunk/src/java/org/apache/nutch/api/NutchServer.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/java/org/apache/nutch/api/Params.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/api/Params.java?rev=1028235&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/api/Params.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/api/Params.java Thu Oct 28 09:23:55 2010
@@ -0,0 +1,20 @@
+package org.apache.nutch.api;
+
+public interface Params {
+
+ public static final String CONF_ID = "conf";
+ public static final String PROP_NAME = "prop";
+ public static final String PROP_VALUE = "value";
+ public static final String PROPS = "props";
+ public static final String CRAWL_ID = "crawl";
+ public static final String JOB_ID = "job";
+ public static final String JOB_TYPE = "type";
+ public static final String ARGS = "args";
+ public static final String CMD = "cmd";
+ public static final String FORCE = "force";
+
+
+ public static final String JOB_CMD_STOP = "stop";
+ public static final String JOB_CMD_ABORT = "abort";
+ public static final String JOB_CMD_GET = "get";
+}
Propchange: nutch/trunk/src/java/org/apache/nutch/api/Params.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/java/org/apache/nutch/api/impl/RAMConfManager.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/api/impl/RAMConfManager.java?rev=1028235&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/api/impl/RAMConfManager.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/api/impl/RAMConfManager.java Thu Oct 28 09:23:55 2010
@@ -0,0 +1,67 @@
+package org.apache.nutch.api.impl;
+
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+import java.util.Map.Entry;
+import java.util.TreeMap;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.api.ConfManager;
+import org.apache.nutch.api.ConfResource;
+import org.apache.nutch.util.NutchConfiguration;
+
+public class RAMConfManager implements ConfManager {
+ Map<String,Configuration> configs = new HashMap<String,Configuration>();
+
+ public RAMConfManager() {
+ configs.put(ConfResource.DEFAULT_CONF, NutchConfiguration.create());
+ }
+
+ public Set<String> list() {
+ return configs.keySet();
+ }
+
+ public Configuration get(String confId) {
+ return configs.get(confId);
+ }
+
+ public Map<String,String> getAsMap(String confId) {
+ Configuration cfg = configs.get(confId);
+ if (cfg == null) return null;
+ Iterator<Entry<String,String>> it = cfg.iterator();
+ TreeMap<String,String> res = new TreeMap<String,String>();
+ while (it.hasNext()) {
+ Entry<String,String> e = it.next();
+ res.put(e.getKey(), e.getValue());
+ }
+ return res;
+ }
+
+ public void create(String confId, Map<String,String> props, boolean force) throws Exception {
+ if (configs.containsKey(confId) && !force) {
+ throw new Exception("Config name '" + confId + "' already exists.");
+ }
+ Configuration conf = NutchConfiguration.create();
+ // apply overrides
+ if (props != null) {
+ for (Entry<String,String> e : props.entrySet()) {
+ conf.set(e.getKey(), e.getValue());
+ }
+ }
+ configs.put(confId, conf);
+ }
+
+ public void setProperty(String confId, String propName, String propValue) throws Exception {
+ if (!configs.containsKey(confId)) {
+ throw new Exception("Unknown configId '" + confId + "'");
+ }
+ Configuration conf = configs.get(confId);
+ conf.set(propName, propValue);
+ }
+
+ public void delete(String confId) {
+ configs.remove(confId);
+ }
+}
Propchange: nutch/trunk/src/java/org/apache/nutch/api/impl/RAMConfManager.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/java/org/apache/nutch/api/impl/RAMJobManager.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/api/impl/RAMJobManager.java?rev=1028235&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/api/impl/RAMJobManager.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/api/impl/RAMJobManager.java Thu Oct 28 09:23:55 2010
@@ -0,0 +1,238 @@
+package org.apache.nutch.api.impl;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Queue;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.nutch.api.JobManager;
+import org.apache.nutch.api.JobStatus;
+import org.apache.nutch.api.JobStatus.State;
+import org.apache.nutch.api.NutchApp;
+import org.apache.nutch.crawl.DbUpdaterJob;
+import org.apache.nutch.crawl.GeneratorJob;
+import org.apache.nutch.crawl.InjectorJob;
+import org.apache.nutch.crawl.WebTableReader;
+import org.apache.nutch.fetcher.FetcherJob;
+import org.apache.nutch.indexer.solr.SolrIndexerJob;
+import org.apache.nutch.metadata.Nutch;
+import org.apache.nutch.parse.ParserJob;
+import org.apache.nutch.util.NutchTool;
+
+public class RAMJobManager implements JobManager {
+ int CAPACITY = 100;
+ ThreadPoolExecutor exec = new MyPoolExecutor(10, CAPACITY, 1, TimeUnit.HOURS,
+ new ArrayBlockingQueue<Runnable>(CAPACITY));
+
+ private class MyPoolExecutor extends ThreadPoolExecutor {
+
+ public MyPoolExecutor(int corePoolSize, int maximumPoolSize,
+ long keepAliveTime, TimeUnit unit, BlockingQueue<Runnable> workQueue) {
+ super(corePoolSize, maximumPoolSize, keepAliveTime, unit, workQueue);
+ }
+
+ @Override
+ protected void beforeExecute(Thread t, Runnable r) {
+ // TODO Auto-generated method stub
+ super.beforeExecute(t, r);
+ synchronized(jobRunning) {
+ jobRunning.offer(((JobWorker)r).jobStatus);
+ }
+ }
+
+ @Override
+ protected void afterExecute(Runnable r, Throwable t) {
+ super.afterExecute(r, t);
+ synchronized(jobRunning) {
+ jobRunning.remove(((JobWorker)r).jobStatus);
+ }
+ JobStatus status = ((JobWorker)r).jobStatus;
+ synchronized(jobHistory) {
+ if (!jobHistory.offer(status)) {
+ jobHistory.poll();
+ jobHistory.add(status);
+ }
+ }
+ }
+ }
+
+ ArrayBlockingQueue<JobStatus> jobHistory = new ArrayBlockingQueue<JobStatus>(CAPACITY);
+ ArrayBlockingQueue<JobStatus> jobRunning = new ArrayBlockingQueue<JobStatus>(CAPACITY);
+
+ private static Map<JobType,Class<? extends NutchTool>> typeToClass = new HashMap<JobType,Class<? extends NutchTool>>();
+
+ static {
+ typeToClass.put(JobType.FETCH, FetcherJob.class);
+ typeToClass.put(JobType.GENERATE, GeneratorJob.class);
+ typeToClass.put(JobType.INDEX, SolrIndexerJob.class);
+ typeToClass.put(JobType.INJECT, InjectorJob.class);
+ typeToClass.put(JobType.PARSE, ParserJob.class);
+ typeToClass.put(JobType.UPDATEDB, DbUpdaterJob.class);
+ typeToClass.put(JobType.READDB, WebTableReader.class);
+ }
+
+ private void addFinishedStatus(JobStatus status) {
+ synchronized(jobHistory) {
+ if (!jobHistory.offer(status)) {
+ jobHistory.poll();
+ jobHistory.add(status);
+ }
+ }
+ }
+
+ @Override
+ public List<JobStatus> list(String crawlId, State state) throws Exception {
+ List<JobStatus> res = new ArrayList<JobStatus>();
+ if (state == null) state = State.ANY;
+ switch(state) {
+ case ANY:
+ res.addAll(jobHistory);
+ /* FALLTHROUGH */
+ case RUNNING:
+ case IDLE:
+ res.addAll(jobRunning);
+ break;
+ default:
+ res.addAll(jobHistory);
+ }
+ return res;
+ }
+
+ @Override
+ public Map<String, String> get(String crawlId, String jobId) throws Exception {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public String create(String crawlId, JobType type, String confId, Object... args) throws Exception {
+ JobWorker worker = new JobWorker(crawlId, type, confId, args);
+ String id = worker.getId();
+ exec.execute(worker);
+ exec.purge();
+ return id;
+ }
+
+ @Override
+ public boolean abort(String crawlId, String id) throws Exception {
+ return false;
+ }
+
+ @Override
+ public boolean stop(String crawlId, String id) throws Exception {
+ // TODO Auto-generated method stub
+ return false;
+ }
+
+ private class JobWorker implements Runnable {
+ String id;
+ JobType type;
+ String confId;
+ NutchTool tool;
+ Object[] args;
+ float progress = 0f;
+ Job currentJob = null;
+ JobStatus jobStatus;
+
+ JobWorker(String crawlId, JobType type, String confId, Object... args) throws Exception {
+ Configuration conf = NutchApp.confMgr.get(confId);
+ // clone it - we are going to modify it
+ if (conf == null) {
+ throw new Exception("Unknown confId " + confId);
+ }
+ this.id = confId + "-" + type + "-" + hashCode();
+ this.type = type;
+ this.confId = confId;
+ this.args = args;
+ conf = new Configuration(conf);
+ if (crawlId != null) {
+ conf.set(Nutch.CRAWL_ID_KEY, crawlId);
+ this.id = crawlId + "-" + this.id;
+ }
+ Class<? extends NutchTool> clz = typeToClass.get(type);
+ if (clz == null) {
+ clz = (Class<? extends NutchTool>)args[0];
+ }
+ tool = ReflectionUtils.newInstance(clz, conf);
+ jobStatus = new JobStatus(id, type, confId, args, State.IDLE, "idle");
+ }
+
+ public String getId() {
+ return id;
+ }
+
+ public float getProgress() {
+ return progress;
+ }
+
+ public State getState() {
+ return jobStatus.state;
+ }
+
+ private final float[] noProgress = new float[2];
+
+ public float[] getCurrentJobProgress() throws IOException {
+ if (currentJob == null) {
+ return noProgress;
+ }
+ float[] res = new float[2];
+ res[0] = currentJob.mapProgress();
+ res[1] = currentJob.reduceProgress();
+ return res;
+ }
+
+ public Map<String,Object> getResult() {
+ return jobStatus.result;
+ }
+
+ public String getStatus() {
+ return jobStatus.msg;
+ }
+
+ @Override
+ public void run() {
+ try {
+ progress = 0f;
+ jobStatus.state = State.RUNNING;
+ jobStatus.msg = "prepare";
+ tool.prepare();
+ progress = 0.1f;
+ Job[] jobs = tool.createJobs(args);
+ float delta = 0.8f / jobs.length;
+ for (int i = 0; i < jobs.length; i++) {
+ currentJob = jobs[i];
+ jobStatus.msg = "job " + (i + 1) + "/" + jobs.length;
+ boolean success = jobs[i].waitForCompletion(true);
+ if (!success) {
+ throw new Exception("Job failed.");
+ }
+ jobStatus.msg = "postJob " + (i + 1);
+ tool.postJob(i, jobs[i]);
+ progress += delta;
+ }
+ currentJob = null;
+ progress = 0.9f;
+ jobStatus.msg = "finish";
+ Map<String,Object> res = tool.finish();
+ if (res != null) {
+ jobStatus.result = res;
+ }
+ progress = 1.0f;
+ jobStatus.state = State.FINISHED;
+ } catch (Exception e) {
+ jobStatus.msg = "ERROR " + jobStatus.msg + ": " + e.toString();
+ jobStatus.state = State.FAILED;
+ }
+ }
+ }
+}
Propchange: nutch/trunk/src/java/org/apache/nutch/api/impl/RAMJobManager.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: nutch/trunk/src/java/org/apache/nutch/crawl/DbUpdaterJob.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/DbUpdaterJob.java?rev=1028235&r1=1028234&r2=1028235&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/DbUpdaterJob.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/DbUpdaterJob.java Thu Oct 28 09:23:55 2010
@@ -2,6 +2,7 @@ package org.apache.nutch.crawl;
import java.util.Collection;
import java.util.HashSet;
+import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -17,10 +18,11 @@ import org.apache.nutch.storage.StorageU
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.NutchTool;
import org.apache.gora.mapreduce.StringComparator;
public class DbUpdaterJob extends Configured
-implements Tool {
+implements Tool, NutchTool {
public static final Logger LOG = LoggerFactory.getLogger(DbUpdaterJob.class);
@@ -51,10 +53,28 @@ implements Tool {
public DbUpdaterJob(Configuration conf) {
setConf(conf);
}
-
- private int updateTable() throws Exception {
- LOG.info("DbUpdaterJob: starting");
+
+ public Map<String,Object> prepare() throws Exception {
+ return null;
+ }
+
+ public Map<String,Object> postJob(int jobIndex, Job job) throws Exception {
+ return null;
+ }
+
+ public Map<String,Object> finish() throws Exception {
+ return null;
+ }
+
+ public Job[] createJobs(Object... args) throws Exception {
+ String crawlId = null;
+ if (args.length > 0) {
+ crawlId = (String)args[0];
+ }
Job job = new NutchJob(getConf(), "update-table");
+ if (crawlId != null) {
+ job.getConfiguration().set(Nutch.CRAWL_ID_KEY, crawlId);
+ }
//job.setBoolean(ALL, updateAll);
ScoringFilters scoringFilters = new ScoringFilters(getConf());
HashSet<WebPage.Field> fields = new HashSet<WebPage.Field>(FIELDS);
@@ -65,8 +85,13 @@ implements Tool {
StorageUtils.initMapperJob(job, fields, String.class,
NutchWritable.class, DbUpdateMapper.class);
StorageUtils.initReducerJob(job, DbUpdateReducer.class);
-
- boolean success = job.waitForCompletion(true);
+ return new Job[]{job};
+ }
+
+ private int updateTable(String batchId) throws Exception {
+ LOG.info("DbUpdaterJob: starting");
+ Job[] jobs = createJobs(new Object[]{batchId});
+ boolean success = jobs[0].waitForCompletion(true);
if (!success){
LOG.info("DbUpdaterJob: failed");
return -1;
@@ -76,10 +101,11 @@ implements Tool {
}
public int run(String[] args) throws Exception {
+ String crawlId = null;
if (args.length == 2 && "-crawlId".equals(args[0])) {
- getConf().set(Nutch.CRAWL_ID_KEY, args[1]);
+ crawlId = args[1];
}
- return updateTable();
+ return updateTable(crawlId);
}
public static void main(String[] args) throws Exception {
Modified: nutch/trunk/src/java/org/apache/nutch/crawl/GeneratorJob.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/GeneratorJob.java?rev=1028235&r1=1028234&r2=1028235&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/GeneratorJob.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/GeneratorJob.java Thu Oct 28 09:23:55 2010
@@ -3,7 +3,9 @@ package org.apache.nutch.crawl;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
+import java.util.HashMap;
import java.util.HashSet;
+import java.util.Map;
import java.util.Random;
import java.util.Set;
@@ -23,8 +25,9 @@ import org.apache.nutch.storage.StorageU
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.NutchTool;
-public class GeneratorJob extends Configured implements Tool {
+public class GeneratorJob extends Configured implements Tool, NutchTool {
public static final String GENERATE_UPDATE_CRAWLDB = "generate.update.crawldb";
public static final String GENERATOR_MIN_SCORE = "generate.min.score";
public static final String GENERATOR_FILTER = "generate.filter";
@@ -121,21 +124,26 @@ public class GeneratorJob extends Config
setConf(conf);
}
- /**
- * Mark URLs ready for fetching.
- * @throws ClassNotFoundException
- * @throws InterruptedException
- * */
- public String generate(long topN, long curTime, boolean filter, boolean norm)
- throws Exception {
-
- LOG.info("GeneratorJob: Selecting best-scoring urls due for fetch.");
- LOG.info("GeneratorJob: starting");
- LOG.info("GeneratorJob: filtering: " + filter);
- if (topN != Long.MAX_VALUE) {
- LOG.info("GeneratorJob: topN: " + topN);
- }
+ public Map<String,Object> prepare() throws Exception {
+ return null;
+ }
+
+ public Map<String,Object> postJob(int jobIndex, Job job) throws Exception {
+ return null;
+ }
+
+ public Map<String,Object> finish() throws Exception {
+ HashMap<String,Object> res = new HashMap<String,Object>();
+ res.put(BATCH_ID, batchId);
+ return res;
+ }
+ public Job[] createJobs(Object... args) throws Exception {
+ // map to inverted subset due for fetch, sort by score
+ long topN = (Long)args[0];
+ long curTime = (Long)args[1];
+ boolean filter = (Boolean)args[2];
+ boolean norm = (Boolean)args[3];
// map to inverted subset due for fetch, sort by score
getConf().setLong(GENERATOR_CUR_TIME, curTime);
getConf().setLong(GENERATOR_TOP_N, topN);
@@ -161,9 +169,30 @@ public class GeneratorJob extends Config
StorageUtils.initMapperJob(job, FIELDS, SelectorEntry.class,
WebPage.class, GeneratorMapper.class, URLPartitioner.class, true);
StorageUtils.initReducerJob(job, GeneratorReducer.class);
+ return new Job[]{job};
+ }
+
+ private String batchId;
+
+ /**
+ * Mark URLs ready for fetching.
+ * @throws ClassNotFoundException
+ * @throws InterruptedException
+ * */
+ public String generate(long topN, long curTime, boolean filter, boolean norm)
+ throws Exception {
- boolean success = job.waitForCompletion(true);
+ LOG.info("GeneratorJob: Selecting best-scoring urls due for fetch.");
+ LOG.info("GeneratorJob: starting");
+ LOG.info("GeneratorJob: filtering: " + filter);
+ if (topN != Long.MAX_VALUE) {
+ LOG.info("GeneratorJob: topN: " + topN);
+ }
+ Job[] jobs = createJobs(topN, curTime, filter, norm);
+ boolean success = jobs[0].waitForCompletion(true);
if (!success) return null;
+
+ batchId = getConf().get(BATCH_ID);
LOG.info("GeneratorJob: done");
LOG.info("GeneratorJob: generated batch id: " + batchId);
Modified: nutch/trunk/src/java/org/apache/nutch/crawl/InjectorJob.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/InjectorJob.java?rev=1028235&r1=1028234&r2=1028235&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/InjectorJob.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/InjectorJob.java Thu Oct 28 09:23:55 2010
@@ -35,6 +35,7 @@ import org.apache.nutch.storage.StorageU
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.NutchTool;
import org.apache.nutch.util.TableUtil;
/** This class takes a flat file of URLs and adds them to the of pages to be
@@ -47,7 +48,7 @@ import org.apache.nutch.util.TableUtil;
* e.g. http://www.nutch.org/ \t nutch.score=10 \t nutch.fetchInterval=2592000 \t userType=open_source
**/
public class InjectorJob extends GoraMapper<String, WebPage, String, WebPage>
- implements Tool {
+ implements Tool, NutchTool {
public static final Logger LOG = LoggerFactory.getLogger(InjectorJob.class);
@@ -213,14 +214,24 @@ public class InjectorJob extends GoraMap
context.write(key, row);
}
+
+ public Map<String,Object> prepare() throws Exception {
+ return null;
+ }
+
+ public Map<String,Object> postJob(int jobIndex, Job job) throws Exception {
+ return null;
+ }
- public void inject(Path urlDir) throws Exception {
- LOG.info("InjectorJob: starting");
- LOG.info("InjectorJob: urlDir: " + urlDir);
-
+ public Map<String,Object> finish() throws Exception {
+ return null;
+ }
+
+ public Job[] createJobs(Object... args) throws Exception {
+ Job[] jobs = new Job[2];
getConf().setLong("injector.current.time", System.currentTimeMillis());
- Job job = new NutchJob(getConf(), "inject-p1 " + urlDir);
- FileInputFormat.addInputPath(job, urlDir);
+ Job job = new NutchJob(getConf(), "inject-p1 " + args[0]);
+ FileInputFormat.addInputPath(job, (Path)args[0]);
job.setMapperClass(UrlMapper.class);
job.setMapOutputKeyClass(String.class);
job.setMapOutputValueClass(WebPage.class);
@@ -232,11 +243,22 @@ public class InjectorJob extends GoraMap
job.setNumReduceTasks(0);
job.waitForCompletion(true);
- job = new NutchJob(getConf(), "inject-p2 " + urlDir);
+ job = new NutchJob(getConf(), "inject-p2 " + args[0]);
StorageUtils.initMapperJob(job, FIELDS, String.class,
WebPage.class, InjectorJob.class);
job.setNumReduceTasks(0);
- job.waitForCompletion(true);
+ jobs[1] = job;
+ return jobs;
+ }
+
+ public void inject(Path urlDir) throws Exception {
+ LOG.info("InjectorJob: starting");
+ LOG.info("InjectorJob: urlDir: " + urlDir);
+
+ Job[] jobs = createJobs(urlDir);
+ jobs[0].waitForCompletion(true);
+
+ jobs[1].waitForCompletion(true);
}
@Override
Modified: nutch/trunk/src/java/org/apache/nutch/crawl/WebTableReader.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/WebTableReader.java?rev=1028235&r1=1028234&r2=1028235&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/WebTableReader.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/WebTableReader.java Thu Oct 28 09:23:55 2010
@@ -37,6 +37,7 @@ import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.Bytes;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.NutchTool;
import org.apache.nutch.util.StringUtil;
import org.apache.nutch.util.TableUtil;
import org.apache.gora.mapreduce.GoraMapper;
@@ -48,7 +49,7 @@ import org.apache.gora.store.DataStore;
* Displays information about the entries of the webtable
**/
-public class WebTableReader extends Configured implements Tool {
+public class WebTableReader extends Configured implements Tool, NutchTool {
public static final Logger LOG = LoggerFactory.getLogger(WebTableReader.class);
@@ -195,114 +196,15 @@ public class WebTableReader extends Conf
}
- public void processStatJob(boolean sort) throws IOException,
- ClassNotFoundException, InterruptedException {
+ public void processStatJob(boolean sort) throws Exception {
+ Job[] jobs = createJobs(sort);
+
if (LOG.isInfoEnabled()) {
LOG.info("WebTable statistics start");
}
-
- Path tmpFolder = new Path(getConf().get("mapred.temp.dir", ".")
- + "stat_tmp" + System.currentTimeMillis());
-
- Job job = new NutchJob(getConf(), "db_stats");
-
- job.getConfiguration().setBoolean("db.reader.stats.sort", sort);
-
- DataStore<String, WebPage> store = StorageUtils.createWebStore(job
- .getConfiguration(), String.class, WebPage.class);
- Query<String, WebPage> query = store.newQuery();
- query.setFields(WebPage._ALL_FIELDS);
-
- GoraMapper.initMapperJob(job, query, store, Text.class, LongWritable.class,
- WebTableStatMapper.class, null, true);
-
- job.setCombinerClass(WebTableStatCombiner.class);
- job.setReducerClass(WebTableStatReducer.class);
-
- FileOutputFormat.setOutputPath(job, tmpFolder);
-
- job.setOutputFormatClass(SequenceFileOutputFormat.class);
-
- job.setOutputKeyClass(Text.class);
- job.setOutputValueClass(LongWritable.class);
-
- boolean success = job.waitForCompletion(true);
-
- FileSystem fileSystem = FileSystem.get(getConf());
-
- if (!success) {
- fileSystem.delete(tmpFolder, true);
- return;
- }
-
- Text key = new Text();
- LongWritable value = new LongWritable();
-
- SequenceFile.Reader[] readers = org.apache.hadoop.mapred.SequenceFileOutputFormat
- .getReaders(getConf(), tmpFolder);
-
- TreeMap<String, LongWritable> stats = new TreeMap<String, LongWritable>();
- for (int i = 0; i < readers.length; i++) {
- SequenceFile.Reader reader = readers[i];
- while (reader.next(key, value)) {
- String k = key.toString();
- LongWritable val = stats.get(k);
- if (val == null) {
- val = new LongWritable();
- if (k.equals("scx"))
- val.set(Long.MIN_VALUE);
- if (k.equals("scn"))
- val.set(Long.MAX_VALUE);
- stats.put(k, val);
- }
- if (k.equals("scx")) {
- if (val.get() < value.get())
- val.set(value.get());
- } else if (k.equals("scn")) {
- if (val.get() > value.get())
- val.set(value.get());
- } else {
- val.set(val.get() + value.get());
- }
- }
- reader.close();
- }
-
- if (LOG.isInfoEnabled()) {
- LOG.info("Statistics for WebTable: ");
- LongWritable totalCnt = stats.get("T");
- if (totalCnt==null)totalCnt=new LongWritable(0);
- stats.remove("T");
- LOG.info("TOTAL urls:\t" + totalCnt.get());
- for (Map.Entry<String, LongWritable> entry : stats.entrySet()) {
- String k = entry.getKey();
- LongWritable val = entry.getValue();
- if (k.equals("scn")) {
- LOG.info("min score:\t" + (float) (val.get() / 1000.0f));
- } else if (k.equals("scx")) {
- LOG.info("max score:\t" + (float) (val.get() / 1000.0f));
- } else if (k.equals("sct")) {
- LOG.info("avg score:\t"
- + (float) ((((double) val.get()) / totalCnt.get()) / 1000.0));
- } else if (k.startsWith("status")) {
- String[] st = k.split(" ");
- int code = Integer.parseInt(st[1]);
- if (st.length > 2)
- LOG.info(" " + st[2] + " :\t" + val);
- else
- LOG.info(st[0] + " " + code + " ("
- + CrawlStatus.getName((byte) code) + "):\t" + val);
- } else
- LOG.info(k + ":\t" + val);
- }
- }
- // removing the tmp folder
- fileSystem.delete(tmpFolder, true);
- if (LOG.isInfoEnabled()) {
- LOG.info("WebTable statistics: done");
- }
-
+ boolean success = jobs[0].waitForCompletion(true);
+ postJob(0, jobs[0]);
}
/** Prints out the entry to the standard out **/
@@ -577,4 +479,131 @@ public class WebTableReader extends Conf
}
}
+ @Override
+ public Map<String, Object> prepare() throws Exception {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ // for now handles only -stat
+ @Override
+ public Job[] createJobs(Object... args) throws Exception {
+ Path tmpFolder = new Path(getConf().get("mapred.temp.dir", ".")
+ + "stat_tmp" + System.currentTimeMillis());
+
+ Job job = new NutchJob(getConf(), "db_stats");
+
+ boolean sort = false;
+ if (args != null && args.length > 0) {
+ sort = (Boolean)args[0];
+ }
+ job.getConfiguration().setBoolean("db.reader.stats.sort", sort);
+
+ DataStore<String, WebPage> store = StorageUtils.createWebStore(job
+ .getConfiguration(), String.class, WebPage.class);
+ Query<String, WebPage> query = store.newQuery();
+ query.setFields(WebPage._ALL_FIELDS);
+
+ GoraMapper.initMapperJob(job, query, store, Text.class, LongWritable.class,
+ WebTableStatMapper.class, null, true);
+
+ job.setCombinerClass(WebTableStatCombiner.class);
+ job.setReducerClass(WebTableStatReducer.class);
+
+ FileOutputFormat.setOutputPath(job, tmpFolder);
+
+ job.setOutputFormatClass(SequenceFileOutputFormat.class);
+
+ job.setOutputKeyClass(Text.class);
+ job.setOutputValueClass(LongWritable.class);
+
+ return new Job[]{job};
+ }
+
+ @Override
+ public Map<String, Object> postJob(int jobIndex, Job job) throws Exception {
+ Path tmpFolder = FileOutputFormat.getOutputPath(job);
+
+ FileSystem fileSystem = FileSystem.get(getConf());
+
+ if (!job.isSuccessful()) {
+ fileSystem.delete(tmpFolder, true);
+ return null;
+ }
+
+ Text key = new Text();
+ LongWritable value = new LongWritable();
+
+ SequenceFile.Reader[] readers = org.apache.hadoop.mapred.SequenceFileOutputFormat
+ .getReaders(getConf(), tmpFolder);
+
+ TreeMap<String, LongWritable> stats = new TreeMap<String, LongWritable>();
+ for (int i = 0; i < readers.length; i++) {
+ SequenceFile.Reader reader = readers[i];
+ while (reader.next(key, value)) {
+ String k = key.toString();
+ LongWritable val = stats.get(k);
+ if (val == null) {
+ val = new LongWritable();
+ if (k.equals("scx"))
+ val.set(Long.MIN_VALUE);
+ if (k.equals("scn"))
+ val.set(Long.MAX_VALUE);
+ stats.put(k, val);
+ }
+ if (k.equals("scx")) {
+ if (val.get() < value.get())
+ val.set(value.get());
+ } else if (k.equals("scn")) {
+ if (val.get() > value.get())
+ val.set(value.get());
+ } else {
+ val.set(val.get() + value.get());
+ }
+ }
+ reader.close();
+ }
+
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Statistics for WebTable: ");
+ LongWritable totalCnt = stats.get("T");
+ if (totalCnt==null)totalCnt=new LongWritable(0);
+ stats.remove("T");
+ LOG.info("TOTAL urls:\t" + totalCnt.get());
+ for (Map.Entry<String, LongWritable> entry : stats.entrySet()) {
+ String k = entry.getKey();
+ LongWritable val = entry.getValue();
+ if (k.equals("scn")) {
+ LOG.info("min score:\t" + (float) (val.get() / 1000.0f));
+ } else if (k.equals("scx")) {
+ LOG.info("max score:\t" + (float) (val.get() / 1000.0f));
+ } else if (k.equals("sct")) {
+ LOG.info("avg score:\t"
+ + (float) ((((double) val.get()) / totalCnt.get()) / 1000.0));
+ } else if (k.startsWith("status")) {
+ String[] st = k.split(" ");
+ int code = Integer.parseInt(st[1]);
+ if (st.length > 2)
+ LOG.info(" " + st[2] + " :\t" + val);
+ else
+ LOG.info(st[0] + " " + code + " ("
+ + CrawlStatus.getName((byte) code) + "):\t" + val);
+ } else
+ LOG.info(k + ":\t" + val);
+ }
+ }
+ // removing the tmp folder
+ fileSystem.delete(tmpFolder, true);
+ if (LOG.isInfoEnabled()) {
+ LOG.info("WebTable statistics: done");
+ }
+ return null;
+ }
+
+ @Override
+ public Map<String, Object> finish() throws Exception {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
}
Modified: nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherJob.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherJob.java?rev=1028235&r1=1028234&r2=1028235&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherJob.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherJob.java Thu Oct 28 09:23:55 2010
@@ -4,6 +4,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
+import java.util.Map;
import java.util.Random;
import java.util.StringTokenizer;
@@ -24,6 +25,7 @@ import org.apache.nutch.storage.StorageU
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.NutchTool;
import org.apache.nutch.util.TableUtil;
import org.apache.gora.mapreduce.GoraMapper;
@@ -31,7 +33,7 @@ import org.apache.gora.mapreduce.GoraMap
* Multi-threaded fetcher.
*
*/
-public class FetcherJob implements Tool {
+public class FetcherJob implements Tool, NutchTool {
public static final String PROTOCOL_REDIR = "protocol";
@@ -138,25 +140,26 @@ public class FetcherJob implements Tool
return fields;
}
- /**
- * Run fetcher.
- * @param batchId batchId (obtained from Generator) or null to fetch all generated fetchlists
- * @param threads number of threads per map task
- * @param shouldResume
- * @param parse if true, then parse content immediately, if false then a separate
- * run of {@link ParserJob} will be needed.
- * @param numTasks number of fetching tasks (reducers). If set to < 1 then use the default,
- * which is mapred.map.tasks.
- * @return 0 on success
- * @throws Exception
- */
- public int fetch(String batchId, int threads,
- boolean shouldResume, boolean parse, int numTasks)
- throws Exception {
- LOG.info("FetcherJob: starting");
-
+ public Map<String,Object> prepare() throws Exception {
+ return null;
+ }
+
+ public Map<String,Object> postJob(int jobIndex, Job job) throws Exception {
+ return null;
+ }
+
+ public Map<String,Object> finish() throws Exception {
+ return null;
+ }
+
+ public Job[] createJobs(Object... args) throws Exception {
checkConfiguration();
-
+ String batchId = (String)args[0];
+ int threads = (Integer)args[1];
+ boolean shouldResume = (Boolean)args[2];
+ boolean parse = (Boolean)args[3];
+ int numTasks = (Integer)args[4];
+
if (threads > 0) {
getConf().setInt(THREADS_KEY, threads);
}
@@ -173,16 +176,6 @@ public class FetcherJob implements Tool
getConf().setLong("fetcher.timelimit", timelimit);
}
- LOG.info("FetcherJob : timelimit set for : " + timelimit);
- LOG.info("FetcherJob: threads: " + getConf().getInt(THREADS_KEY, 10));
- LOG.info("FetcherJob: parsing: " + getConf().getBoolean(PARSE_KEY, true));
- LOG.info("FetcherJob: resuming: " + getConf().getBoolean(RESUME_KEY, false));
- if (batchId.equals(Nutch.ALL_BATCH_ID_STR)) {
- LOG.info("FetcherJob: fetching all");
- } else {
- LOG.info("FetcherJob: batchId: " + batchId);
- }
-
Job job = new NutchJob(getConf(), "fetch");
Collection<WebPage.Field> fields = getFields(job);
StorageUtils.initMapperJob(job, fields, IntWritable.class,
@@ -194,8 +187,37 @@ public class FetcherJob implements Tool
} else {
job.setNumReduceTasks(numTasks);
}
+ return new Job[]{job};
+ }
+
+ /**
+ * Run fetcher.
+ * @param batchId batchId (obtained from Generator) or null to fetch all generated fetchlists
+ * @param threads number of threads per map task
+ * @param shouldResume
+ * @param parse if true, then parse content immediately, if false then a separate
+ * run of {@link ParserJob} will be needed.
+ * @param numTasks number of fetching tasks (reducers). If set to < 1 then use the default,
+ * which is mapred.map.tasks.
+ * @return 0 on success
+ * @throws Exception
+ */
+ public int fetch(String batchId, int threads, boolean shouldResume, boolean parse, int numTasks)
+ throws Exception {
+ LOG.info("FetcherJob: starting");
+
+ LOG.info("FetcherJob : timelimit set for : " + getConf().getLong("fetcher.timelimit", -1));
+ LOG.info("FetcherJob: threads: " + getConf().getInt(THREADS_KEY, 10));
+ LOG.info("FetcherJob: parsing: " + getConf().getBoolean(PARSE_KEY, true));
+ LOG.info("FetcherJob: resuming: " + getConf().getBoolean(RESUME_KEY, false));
+ if (batchId.equals(Nutch.ALL_BATCH_ID_STR)) {
+ LOG.info("FetcherJob: fetching all");
+ } else {
+ LOG.info("FetcherJob: batchId: " + batchId);
+ }
- boolean success = job.waitForCompletion(true);
+ Job[] jobs = createJobs(batchId, threads, shouldResume, parse, numTasks);
+ boolean success = jobs[0].waitForCompletion(true);
if (!success) {
LOG.info("FetcherJob: failed");
return -1;
Modified: nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrIndexerJob.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrIndexerJob.java?rev=1028235&r1=1028234&r2=1028235&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrIndexerJob.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrIndexerJob.java Thu Oct 28 09:23:55 2010
@@ -16,6 +16,7 @@
*/
package org.apache.nutch.indexer.solr;
+import java.util.Map;
import java.util.Random;
import org.slf4j.Logger;
@@ -30,16 +31,29 @@ import org.apache.nutch.indexer.IndexerJ
import org.apache.nutch.indexer.NutchIndexWriterFactory;
import org.apache.nutch.metadata.Nutch;
import org.apache.nutch.util.NutchConfiguration;
+import org.apache.nutch.util.NutchTool;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
-public class SolrIndexerJob extends IndexerJob {
+public class SolrIndexerJob extends IndexerJob implements NutchTool {
public static Logger LOG = LoggerFactory.getLogger(SolrIndexerJob.class);
- private void indexSolr(String solrUrl, String batchId) throws Exception {
- LOG.info("SolrIndexerJob: starting");
-
+ public Map<String,Object> prepare() throws Exception {
+ return null;
+ }
+
+ public Map<String,Object> postJob(int jobIndex, Job job) throws Exception {
+ return null;
+ }
+
+ public Map<String,Object> finish() throws Exception {
+ return null;
+ }
+
+ public Job[] createJobs(Object... args) throws Exception {
+ String solrUrl = (String)args[0];
+ String batchId = (String)args[1];
NutchIndexWriterFactory.addClassToConf(getConf(), SolrWriter.class);
getConf().set(SolrConstants.SERVER_URL, solrUrl);
@@ -48,21 +62,28 @@ public class SolrIndexerJob extends Inde
+ new Random().nextInt());
FileOutputFormat.setOutputPath(job, tmp);
+ return new Job[]{job};
+ }
+
+ private void indexSolr(String solrUrl, String crawlId) throws Exception {
+ LOG.info("SolrIndexerJob: starting");
+
+ Job[] jobs = createJobs(solrUrl, crawlId);
boolean success = false;
try {
- success = job.waitForCompletion(true);
+ success = jobs[0].waitForCompletion(true);
// do the commits once and for all the reducers in one go
SolrServer solr = new CommonsHttpSolrServer(solrUrl);
solr.commit();
} finally {
- FileSystem.get(getConf()).delete(tmp, true);
+ FileSystem.get(getConf()).delete(FileOutputFormat.getOutputPath(jobs[0]), true);
}
LOG.info("SolrIndexerJob: " + (success ? "done" : "failed"));
}
public int run(String[] args) throws Exception {
if (args.length < 2) {
- System.err.println("Usage: SolrIndexerJob <solr url> (<batch id> | -all | -reindex) [-crawlId <id>]");
+ System.err.println("Usage: SolrIndexerJob <solr url> (<batchId> | -all | -reindex) [-crawlId <id>]");
return -1;
}
Modified: nutch/trunk/src/java/org/apache/nutch/parse/ParserJob.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/parse/ParserJob.java?rev=1028235&r1=1028234&r2=1028235&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/parse/ParserJob.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/parse/ParserJob.java Thu Oct 28 09:23:55 2010
@@ -3,6 +3,7 @@ package org.apache.nutch.parse;
import java.io.IOException;
import java.util.Collection;
import java.util.HashSet;
+import java.util.Map;
import org.apache.avro.util.Utf8;
import org.slf4j.Logger;
@@ -22,11 +23,12 @@ import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.IdentityPageReducer;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.NutchTool;
import org.apache.nutch.util.TableUtil;
import org.apache.gora.mapreduce.GoraMapper;
public class ParserJob extends GoraMapper<String, WebPage, String, WebPage>
- implements Tool {
+ implements Tool, NutchTool {
public static final Logger LOG = LoggerFactory.getLogger(ParserJob.class);
@@ -144,14 +146,43 @@ public class ParserJob extends GoraMappe
this.conf = conf;
}
- public int parse(String batchId, boolean shouldResume, boolean force) throws Exception {
- LOG.info("ParserJob: starting");
-
+ public Map<String,Object> prepare() throws Exception {
+ return null;
+ }
+
+ public Map<String,Object> postJob(int jobIndex, Job job) throws Exception {
+ return null;
+ }
+
+ public Map<String,Object> finish() throws Exception {
+ return null;
+ }
+
+ public Job[] createJobs(Object... args) throws Exception {
+ String batchId = (String)args[0];
+ boolean shouldResume = (Boolean)args[1];
+ boolean force = (Boolean)args[2];
+
if (batchId != null) {
getConf().set(GeneratorJob.BATCH_ID, batchId);
}
getConf().setBoolean(RESUME_KEY, shouldResume);
getConf().setBoolean(FORCE_KEY, force);
+ final Job job = new NutchJob(getConf(), "parse");
+
+ Collection<WebPage.Field> fields = getFields(job);
+ StorageUtils.initMapperJob(job, fields, String.class, WebPage.class,
+ ParserJob.class);
+ StorageUtils.initReducerJob(job, IdentityPageReducer.class);
+ job.setNumReduceTasks(0);
+
+ return new Job[]{job};
+ }
+
+ public int parse(String crawlId, boolean shouldResume, boolean force) throws Exception {
+ Job[] jobs = createJobs(crawlId, shouldResume, force);
+
+ LOG.info("ParserJob: starting");
LOG.info("ParserJob: resuming:\t" + getConf().getBoolean(RESUME_KEY, false));
LOG.info("ParserJob: forced reparse:\t" + getConf().getBoolean(FORCE_KEY, false));
@@ -160,15 +191,7 @@ public class ParserJob extends GoraMappe
} else {
LOG.info("ParserJob: batchId:\t" + batchId);
}
-
- final Job job = new NutchJob(getConf(), "parse");
-
- Collection<WebPage.Field> fields = getFields(job);
- StorageUtils.initMapperJob(job, fields, String.class, WebPage.class,
- ParserJob.class);
- StorageUtils.initReducerJob(job, IdentityPageReducer.class);
- job.setNumReduceTasks(0);
- boolean success = job.waitForCompletion(true);
+ boolean success = jobs[0].waitForCompletion(true);
if (!success){
LOG.info("ParserJob: failed");
return -1;
Modified: nutch/trunk/src/java/org/apache/nutch/util/LogUtil.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/util/LogUtil.java?rev=1028235&r1=1028234&r2=1028235&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/util/LogUtil.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/util/LogUtil.java Thu Oct 28 09:23:55 2010
@@ -100,7 +100,7 @@ public class LogUtil {
if (!hasNewline())
return;
try {
- method.invoke(logger, new String[] { toString().trim() });
+ method.invoke(logger, new Object[] { toString().trim() });
} catch (Exception e) {
if (LOG.isErrorEnabled()) {
LOG.error("Cannot log with method [" + method + "]", e);
Added: nutch/trunk/src/java/org/apache/nutch/util/NutchTool.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/util/NutchTool.java?rev=1028235&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/util/NutchTool.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/util/NutchTool.java Thu Oct 28 09:23:55 2010
@@ -0,0 +1,20 @@
+package org.apache.nutch.util;
+
+import java.util.Map;
+
+import org.apache.hadoop.mapreduce.Job;
+
+public interface NutchTool {
+
+ /** Prepares the tool. May return additional info, or null. */
+ public Map<String,Object> prepare() throws Exception;
+
+ /** Create jobs to be executed in sequence. */
+ public Job[] createJobs(Object... args) throws Exception;
+
+ /** Post-process results of a job. */
+ public Map<String,Object> postJob(int jobIndex, Job job) throws Exception;
+
+ /** Finish processing and optionally return results. */
+ public Map<String,Object> finish() throws Exception;
+}
\ No newline at end of file
Propchange: nutch/trunk/src/java/org/apache/nutch/util/NutchTool.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/test/org/apache/nutch/api/TestAPI.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/api/TestAPI.java?rev=1028235&view=auto
==============================================================================
--- nutch/trunk/src/test/org/apache/nutch/api/TestAPI.java (added)
+++ nutch/trunk/src/test/org/apache/nutch/api/TestAPI.java Thu Oct 28 09:23:55 2010
@@ -0,0 +1,94 @@
+package org.apache.nutch.api;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.nutch.api.JobManager.JobType;
+import org.restlet.ext.jackson.JacksonRepresentation;
+import org.restlet.representation.Representation;
+import org.restlet.resource.ClientResource;
+
+import junit.framework.TestCase;
+
+public class TestAPI extends TestCase {
+
+ NutchServer server;
+ ClientResource cli;
+
+ String baseUrl = "http://localhost:8192/nutch/";
+
+ public void setUp() throws Exception {
+ server = new NutchServer(8192);
+ server.start();
+ }
+
+ public void tearDown() throws Exception {
+ if (!server.stop(false)) {
+ for (int i = 1; i < 11; i++) {
+ System.err.println("Waiting for jobs to complete - " + i + "s");
+ try {
+ Thread.sleep(1000);
+ } catch (Exception e) {};
+ server.stop(false);
+ if (!server.isRunning()) {
+ break;
+ }
+ }
+ }
+ if (server.isRunning()) {
+ System.err.println("Forcibly stopping server...");
+ server.stop(true);
+ }
+ }
+
+ public void testInfoAPI() throws Exception {
+ ClientResource cli = new ClientResource(baseUrl);
+ String expected = "[[\"confs\",\"Configuration manager\"],[\"jobs\",\"Job manager\"]]";
+ String got = cli.get().getText();
+ assertEquals(expected, got);
+ }
+
+ public void testConfsAPI() throws Exception {
+ ClientResource cli = new ClientResource(baseUrl + ConfResource.PATH);
+ assertEquals("[\"default\"]", cli.get().getText());
+ // create
+ Map<String,Object> map = new HashMap<String,Object>();
+ map.put(Params.CONF_ID, "test");
+ HashMap<String,String> props = new HashMap<String,String>();
+ props.put("testProp", "blurfl");
+ map.put(Params.PROPS, props);
+ JacksonRepresentation<Map<String,Object>> jr =
+ new JacksonRepresentation<Map<String,Object>>(map);
+ System.out.println(cli.put(jr).getText());
+ assertEquals("[\"default\",\"test\"]", cli.get().getText());
+ cli = new ClientResource(baseUrl + ConfResource.PATH + "/test");
+ Map res = cli.get(Map.class);
+ assertEquals("blurfl", res.get("testProp"));
+ // delete
+ cli.delete();
+ cli = new ClientResource(baseUrl + ConfResource.PATH);
+ assertEquals("[\"default\"]", cli.get().getText());
+ }
+
+ public void testJobsAPI() throws Exception {
+ ClientResource cli = new ClientResource(baseUrl + JobResource.PATH);
+ assertEquals("[]", cli.get().getText());
+ // create
+ Map<String,Object> map = new HashMap<String,Object>();
+ map.put(Params.JOB_TYPE, JobType.READDB.toString());
+ map.put(Params.CONF_ID, "default");
+ JacksonRepresentation<Map<String,Object>> jr =
+ new JacksonRepresentation<Map<String,Object>>(map);
+ Representation r = cli.put(map);
+ String jobId = r.getText();
+ assertNotNull(jobId);
+ assertTrue(jobId.startsWith("default-READDB-"));
+ // list
+ Map[] list = cli.get(Map[].class);
+ assertEquals(1, list.length);
+ String id = (String)list[0].get("id");
+ String state = (String)list[0].get("state");
+ assertEquals(jobId, id);
+ assertEquals(state, "RUNNING");
+ }
+}
Propchange: nutch/trunk/src/test/org/apache/nutch/api/TestAPI.java
------------------------------------------------------------------------------
svn:eol-style = native