You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by th...@apache.org on 2016/07/16 19:48:48 UTC

[32/51] [partial] nutch git commit: NUTCH-2292 : Mavenize the build for nutch-core and nutch-plugins

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/util/domain/DomainStatistics.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/util/domain/DomainStatistics.java b/nutch-core/src/main/java/org/apache/nutch/util/domain/DomainStatistics.java
new file mode 100644
index 0000000..6c1bd9e
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/util/domain/DomainStatistics.java
@@ -0,0 +1,234 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.util.domain;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URL;
+import java.text.SimpleDateFormat;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.nutch.crawl.CrawlDatum;
+import org.apache.nutch.util.NutchConfiguration;
+import org.apache.nutch.util.TimingUtil;
+import org.apache.nutch.util.URLUtil;
+
+/**
+ * Extracts some very basic statistics about domains from the crawldb
+ */
+public class DomainStatistics extends Configured implements Tool {
+
+  private static final Logger LOG = LoggerFactory
+      .getLogger(DomainStatistics.class);
+
+  private static final Text FETCHED_TEXT = new Text("FETCHED");
+  private static final Text NOT_FETCHED_TEXT = new Text("NOT_FETCHED");
+
+  public static enum MyCounter {
+    FETCHED, NOT_FETCHED, EMPTY_RESULT
+  };
+
+  private static final int MODE_HOST = 1;
+  private static final int MODE_DOMAIN = 2;
+  private static final int MODE_SUFFIX = 3;
+  private static final int MODE_TLD = 4;
+
+  private int mode = 0;
+
+  public int run(String[] args) throws Exception {
+    if (args.length < 3) {
+      System.err.println("Usage: DomainStatistics inputDirs outDir mode [numOfReducer]");
+
+      System.err.println("\tinputDirs\tComma separated list of crawldb input directories");
+      System.err.println("\t\t\tE.g.: crawl/crawldb/");
+
+      System.err.println("\toutDir\t\tOutput directory where results should be dumped");
+
+      System.err.println("\tmode\t\tSet statistics gathering mode");
+      System.err.println("\t\t\t\thost\tGather statistics by host");
+      System.err.println("\t\t\t\tdomain\tGather statistics by domain");
+      System.err.println("\t\t\t\tsuffix\tGather statistics by suffix");
+      System.err.println("\t\t\t\ttld\tGather statistics by top level directory");
+
+      System.err.println("\t[numOfReducers]\tOptional number of reduce jobs to use. Defaults to 1.");
+      
+      return 1;
+    }
+    String inputDir = args[0];
+    String outputDir = args[1];
+    int numOfReducers = 1;
+
+    if (args.length > 3) {
+      numOfReducers = Integer.parseInt(args[3]);
+    }
+
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
+    LOG.info("DomainStatistics: starting at " + sdf.format(start));
+
+    int mode = 0;
+    String jobName = "DomainStatistics";
+    if (args[2].equals("host")) {
+      jobName = "Host statistics";
+      mode = MODE_HOST;
+    } else if (args[2].equals("domain")) {
+      jobName = "Domain statistics";
+      mode = MODE_DOMAIN;
+    } else if (args[2].equals("suffix")) {
+      jobName = "Suffix statistics";
+      mode = MODE_SUFFIX;
+    } else if (args[2].equals("tld")) {
+      jobName = "TLD statistics";
+      mode = MODE_TLD;
+    }
+
+    Configuration conf = getConf();
+    conf.setInt("domain.statistics.mode", mode);
+    conf.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);
+
+    Job job = Job.getInstance(conf, jobName);
+    job.setJarByClass(DomainStatistics.class);
+
+    String[] inputDirsSpecs = inputDir.split(",");
+    for (int i = 0; i < inputDirsSpecs.length; i++) {
+      File completeInputPath = new File(new File(inputDirsSpecs[i]), "current");
+      FileInputFormat.addInputPath(job, new Path(completeInputPath.toString()));
+    }
+
+    job.setInputFormatClass(SequenceFileInputFormat.class);
+    FileOutputFormat.setOutputPath(job, new Path(outputDir));
+    job.setOutputFormatClass(TextOutputFormat.class);
+
+    job.setMapOutputKeyClass(Text.class);
+    job.setMapOutputValueClass(LongWritable.class);
+    job.setOutputKeyClass(Text.class);
+    job.setOutputValueClass(LongWritable.class);
+
+    job.setMapperClass(DomainStatisticsMapper.class);
+    job.setReducerClass(DomainStatisticsReducer.class);
+    job.setCombinerClass(DomainStatisticsCombiner.class);
+    job.setNumReduceTasks(numOfReducers);
+
+    try {
+      job.waitForCompletion(true);
+    } catch (Exception e) {
+      throw e;
+    }
+
+    long end = System.currentTimeMillis();
+    LOG.info("DomainStatistics: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
+    return 0;
+  }
+
+  static class DomainStatisticsMapper extends
+      Mapper<Text, CrawlDatum, Text, LongWritable> {
+    int mode = 0;
+
+    public void setup(Context context) {
+      mode = context.getConfiguration().getInt("domain.statistics.mode",
+          MODE_DOMAIN);
+    }
+
+    public void map(Text urlText, CrawlDatum datum, Context context)
+        throws IOException, InterruptedException {
+
+      if (datum.getStatus() == CrawlDatum.STATUS_DB_FETCHED
+          || datum.getStatus() == CrawlDatum.STATUS_DB_NOTMODIFIED) {
+
+        try {
+          URL url = new URL(urlText.toString());
+          String out = null;
+          switch (mode) {
+          case MODE_HOST:
+            out = url.getHost();
+            break;
+          case MODE_DOMAIN:
+            out = URLUtil.getDomainName(url);
+            break;
+          case MODE_SUFFIX:
+            out = URLUtil.getDomainSuffix(url).getDomain();
+            break;
+          case MODE_TLD:
+            out = URLUtil.getTopLevelDomainName(url);
+            break;
+          }
+          if (out.trim().equals("")) {
+            LOG.info("url : " + url);
+            context.getCounter(MyCounter.EMPTY_RESULT).increment(1);
+          }
+
+          context.write(new Text(out), new LongWritable(1));
+        } catch (Exception ex) {
+        }
+
+        context.getCounter(MyCounter.FETCHED).increment(1);
+        context.write(FETCHED_TEXT, new LongWritable(1));
+      } else {
+        context.getCounter(MyCounter.NOT_FETCHED).increment(1);
+        context.write(NOT_FETCHED_TEXT, new LongWritable(1));
+      }
+    }
+  }
+
+  static class DomainStatisticsReducer extends
+      Reducer<Text, LongWritable, LongWritable, Text> {
+    public void reduce(Text key, Iterable<LongWritable> values, Context context)
+        throws IOException, InterruptedException {
+      long total = 0;
+
+      for (LongWritable val : values) {
+        total += val.get();
+      }
+
+      context.write(new LongWritable(total), key);
+    }
+  }
+
+  public static class DomainStatisticsCombiner extends
+      Reducer<Text, LongWritable, Text, LongWritable> {
+    public void reduce(Text key, Iterable<LongWritable> values, Context context)
+        throws IOException, InterruptedException {
+      long total = 0;
+
+      for (LongWritable val : values) {
+        total += val.get();
+      }
+      context.write(key, new LongWritable(total));
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    ToolRunner.run(NutchConfiguration.create(), new DomainStatistics(), args);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/util/domain/DomainSuffix.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/util/domain/DomainSuffix.java b/nutch-core/src/main/java/org/apache/nutch/util/domain/DomainSuffix.java
new file mode 100644
index 0000000..d40ebe9
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/util/domain/DomainSuffix.java
@@ -0,0 +1,79 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.util.domain;
+
+/**
+ * This class represents the last part of the host name, which is operated by
+ * authoritives, not individuals. This information is needed to find the domain
+ * name of a host. The domain name of a host is defined to be the last part
+ * before the domain suffix, w/o subdomain names. As an example the domain name
+ * of <br>
+ * <code> http://lucene.apache.org/ 
+ * </code><br>
+ * is <code> apache.org</code> <br>
+ * This class holds three fields, <strong>domain</strong> field represents the
+ * suffix (such as "co.uk") <strong>boost</strong> is a float for boosting score
+ * of url's with this suffix <strong>status</strong> field represents domain's
+ * status
+ * 
+ * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
+ * @see TopLevelDomain for info please see conf/domain-suffixes.xml
+ */
+public class DomainSuffix {
+
+  /**
+   * Enumeration of the status of the tld. Please see domain-suffixes.xml.
+   */
+  public enum Status {
+    INFRASTRUCTURE, SPONSORED, UNSPONSORED, STARTUP, PROPOSED, DELETED, PSEUDO_DOMAIN, DEPRECATED, IN_USE, NOT_IN_USE, REJECTED
+  };
+
+  private String domain;
+  private Status status;
+  private float boost;
+
+  public static final float DEFAULT_BOOST = 1.0f;
+  public static final Status DEFAULT_STATUS = Status.IN_USE;
+
+  public DomainSuffix(String domain, Status status, float boost) {
+    this.domain = domain;
+    this.status = status;
+    this.boost = boost;
+  }
+
+  public DomainSuffix(String domain) {
+    this(domain, DEFAULT_STATUS, DEFAULT_BOOST);
+  }
+
+  public String getDomain() {
+    return domain;
+  }
+
+  public Status getStatus() {
+    return status;
+  }
+
+  public float getBoost() {
+    return boost;
+  }
+
+  @Override
+  public String toString() {
+    return domain;
+  }
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/util/domain/DomainSuffixes.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/util/domain/DomainSuffixes.java b/nutch-core/src/main/java/org/apache/nutch/util/domain/DomainSuffixes.java
new file mode 100644
index 0000000..765457e
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/util/domain/DomainSuffixes.java
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.util.domain;
+
+import java.io.InputStream;
+import java.util.HashMap;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.util.StringUtils;
+
+/**
+ * Storage class for <code>DomainSuffix</code> objects Note: this class is
+ * singleton
+ * 
+ * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
+ */
+public class DomainSuffixes {
+  private static final Logger LOG = LoggerFactory
+      .getLogger(DomainSuffixes.class);
+
+  private HashMap<String, DomainSuffix> domains = new HashMap<String, DomainSuffix>();
+
+  private static DomainSuffixes instance;
+
+  /** private ctor */
+  private DomainSuffixes() {
+    String file = "domain-suffixes.xml";
+    InputStream input = this.getClass().getClassLoader()
+        .getResourceAsStream(file);
+    try {
+      new DomainSuffixesReader().read(this, input);
+    } catch (Exception ex) {
+      LOG.warn(StringUtils.stringifyException(ex));
+    }
+  }
+
+  /**
+   * Singleton instance, lazy instantination
+   * 
+   * @return returns the domain suffix instance
+   */
+  public static DomainSuffixes getInstance() {
+    if (instance == null) {
+      instance = new DomainSuffixes();
+    }
+    return instance;
+  }
+
+  void addDomainSuffix(DomainSuffix tld) {
+    domains.put(tld.getDomain(), tld);
+  }
+
+  /** return whether the extension is a registered domain entry */
+  public boolean isDomainSuffix(String extension) {
+    return domains.containsKey(extension);
+  }
+
+  /**
+   * Return the {@link DomainSuffix} object for the extension, if extension is a
+   * top level domain returned object will be an instance of
+   * {@link TopLevelDomain}
+   * 
+   * @param extension
+   *          of the domain
+   */
+  public DomainSuffix get(String extension) {
+    return domains.get(extension);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/util/domain/DomainSuffixesReader.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/util/domain/DomainSuffixesReader.java b/nutch-core/src/main/java/org/apache/nutch/util/domain/DomainSuffixesReader.java
new file mode 100644
index 0000000..a2a60e2
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/util/domain/DomainSuffixesReader.java
@@ -0,0 +1,164 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.util.domain;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.nutch.util.domain.DomainSuffix.Status;
+import org.apache.nutch.util.domain.TopLevelDomain.Type;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+/**
+ * For parsing xml files containing domain suffix definitions. Parsed xml files
+ * should validate against <code>domain-suffixes.xsd</code>
+ * 
+ * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
+ */
+class DomainSuffixesReader {
+
+  private static final Logger LOG = LoggerFactory
+      .getLogger(DomainSuffixesReader.class);
+
+  void read(DomainSuffixes tldEntries, InputStream input) throws IOException {
+    try {
+
+      DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+      factory.setIgnoringComments(true);
+      DocumentBuilder builder = factory.newDocumentBuilder();
+      Document document = builder.parse(new InputSource(input));
+
+      Element root = document.getDocumentElement();
+
+      if (root != null && root.getTagName().equals("domains")) {
+
+        Element tlds = (Element) root.getElementsByTagName("tlds").item(0);
+        Element suffixes = (Element) root.getElementsByTagName("suffixes")
+            .item(0);
+
+        // read tlds
+        readITLDs(tldEntries, (Element) tlds.getElementsByTagName("itlds")
+            .item(0));
+        readGTLDs(tldEntries, (Element) tlds.getElementsByTagName("gtlds")
+            .item(0));
+        readCCTLDs(tldEntries, (Element) tlds.getElementsByTagName("cctlds")
+            .item(0));
+
+        readSuffixes(tldEntries, suffixes);
+      } else {
+        throw new IOException("xml file is not valid");
+      }
+    } catch (ParserConfigurationException ex) {
+      LOG.warn(StringUtils.stringifyException(ex));
+      throw new IOException(ex.getMessage());
+    } catch (SAXException ex) {
+      LOG.warn(StringUtils.stringifyException(ex));
+      throw new IOException(ex.getMessage());
+    }
+  }
+
+  void readITLDs(DomainSuffixes tldEntries, Element el) {
+    NodeList children = el.getElementsByTagName("tld");
+    for (int i = 0; i < children.getLength(); i++) {
+      tldEntries.addDomainSuffix(readGTLD((Element) children.item(i),
+          Type.INFRASTRUCTURE));
+    }
+  }
+
+  void readGTLDs(DomainSuffixes tldEntries, Element el) {
+    NodeList children = el.getElementsByTagName("tld");
+    for (int i = 0; i < children.getLength(); i++) {
+      tldEntries.addDomainSuffix(readGTLD((Element) children.item(i),
+          Type.GENERIC));
+    }
+  }
+
+  void readCCTLDs(DomainSuffixes tldEntries, Element el) throws IOException {
+    NodeList children = el.getElementsByTagName("tld");
+    for (int i = 0; i < children.getLength(); i++) {
+      tldEntries.addDomainSuffix(readCCTLD((Element) children.item(i)));
+    }
+  }
+
+  TopLevelDomain readGTLD(Element el, Type type) {
+    String domain = el.getAttribute("domain");
+    Status status = readStatus(el);
+    float boost = readBoost(el);
+    return new TopLevelDomain(domain, type, status, boost);
+  }
+
+  TopLevelDomain readCCTLD(Element el) throws IOException {
+    String domain = el.getAttribute("domain");
+    Status status = readStatus(el);
+    float boost = readBoost(el);
+    String countryName = readCountryName(el);
+    return new TopLevelDomain(domain, status, boost, countryName);
+  }
+
+  /** read optional field status */
+  Status readStatus(Element el) {
+    NodeList list = el.getElementsByTagName("status");
+    if (list == null || list.getLength() == 0)
+      return DomainSuffix.DEFAULT_STATUS;
+    return Status.valueOf(list.item(0).getFirstChild().getNodeValue());
+  }
+
+  /** read optional field boost */
+  float readBoost(Element el) {
+    NodeList list = el.getElementsByTagName("boost");
+    if (list == null || list.getLength() == 0)
+      return DomainSuffix.DEFAULT_BOOST;
+    return Float.parseFloat(list.item(0).getFirstChild().getNodeValue());
+  }
+
+  /**
+   * read field countryname
+   */
+  String readCountryName(Element el) throws IOException {
+    NodeList list = el.getElementsByTagName("country");
+    if (list == null || list.getLength() == 0)
+      throw new IOException("Country name should be given");
+    return list.item(0).getNodeValue();
+  }
+
+  void readSuffixes(DomainSuffixes tldEntries, Element el) {
+    NodeList children = el.getElementsByTagName("suffix");
+    for (int i = 0; i < children.getLength(); i++) {
+      tldEntries.addDomainSuffix(readSuffix((Element) children.item(i)));
+    }
+  }
+
+  DomainSuffix readSuffix(Element el) {
+    String domain = el.getAttribute("domain");
+    Status status = readStatus(el);
+    float boost = readBoost(el);
+    return new DomainSuffix(domain, status, boost);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/util/domain/TopLevelDomain.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/util/domain/TopLevelDomain.java b/nutch-core/src/main/java/org/apache/nutch/util/domain/TopLevelDomain.java
new file mode 100644
index 0000000..f442d1f
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/util/domain/TopLevelDomain.java
@@ -0,0 +1,67 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.util.domain;
+
+/**
+ * (From wikipedia) A top-level domain (TLD) is the last part of an Internet
+ * domain name; that is, the letters which follow the final dot of any domain
+ * name. For example, in the domain name <code>www.website.com</code>, the
+ * top-level domain is <code>com</code>.
+ * 
+ * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
+ * 
+ * @see <a href="http://www.iana.org/"> iana.org</a>
+ * 
+ * @see <a href="http://en.wikipedia.org/wiki/Top-level_domain">
+ *      Top-level_domain</a>
+ */
+public class TopLevelDomain extends DomainSuffix {
+
+  public enum Type {
+    INFRASTRUCTURE, GENERIC, COUNTRY
+  };
+
+  private Type type;
+  private String countryName = null;
+
+  public TopLevelDomain(String domain, Type type, Status status, float boost) {
+    super(domain, status, boost);
+    this.type = type;
+  }
+
+  public TopLevelDomain(String domain, Status status, float boost,
+      String countryName) {
+    super(domain, status, boost);
+    this.type = Type.COUNTRY;
+    this.countryName = countryName;
+  }
+
+  public Type getType() {
+    return type;
+  }
+
+  /**
+   * Returns the country name if TLD is Country Code TLD
+   * 
+   * @return country name or null
+   */
+  public String getCountryName() {
+    return countryName;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/util/domain/package.html
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/util/domain/package.html b/nutch-core/src/main/java/org/apache/nutch/util/domain/package.html
new file mode 100644
index 0000000..49e0e6a
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/util/domain/package.html
@@ -0,0 +1,14 @@
+<html>
+<body>
+<h2>Classes for domain name analysis.</h2>
+
+for information please refer to following urls : 
+<ul>
+<li><a href="http://en.wikipedia.org/wiki/DNS">http://en.wikipedia.org/wiki/DNS</a></li>
+<li><a href="http://en.wikipedia.org/wiki/Top-level_domain">http://en.wikipedia.org/wiki/Top-level_domain</a></li>
+<li><a href="http://wiki.mozilla.org/TLD_List">http://wiki.mozilla.org/TLD_List</a></li>
+<li><a href="http://publicsuffix.org/">http://publicsuffix.org/</a></li>
+</ul>
+
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/util/package-info.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/util/package-info.java b/nutch-core/src/main/java/org/apache/nutch/util/package-info.java
new file mode 100644
index 0000000..053dbc1
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/util/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Miscellaneous utility classes.
+ */
+package org.apache.nutch.util;
+

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/webui/NutchUiApplication.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/webui/NutchUiApplication.java b/nutch-core/src/main/java/org/apache/nutch/webui/NutchUiApplication.java
new file mode 100644
index 0000000..6fd2396
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/webui/NutchUiApplication.java
@@ -0,0 +1,75 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.webui;
+
+import org.apache.nutch.webui.pages.DashboardPage;
+import org.apache.nutch.webui.pages.assets.NutchUiCssReference;
+import org.apache.wicket.markup.html.WebPage;
+import org.apache.wicket.protocol.http.WebApplication;
+import org.apache.wicket.spring.injection.annot.SpringComponentInjector;
+import org.springframework.beans.BeansException;
+import org.springframework.context.ApplicationContext;
+import org.springframework.context.ApplicationContextAware;
+import org.springframework.stereotype.Component;
+
+import de.agilecoders.wicket.core.Bootstrap;
+import de.agilecoders.wicket.core.markup.html.themes.bootstrap.BootstrapCssReference;
+import de.agilecoders.wicket.core.settings.BootstrapSettings;
+import de.agilecoders.wicket.core.settings.SingleThemeProvider;
+import de.agilecoders.wicket.core.settings.Theme;
+import de.agilecoders.wicket.extensions.markup.html.bootstrap.icon.FontAwesomeCssReference;
+
+@Component
+public class NutchUiApplication extends WebApplication implements
+    ApplicationContextAware {
+  private static final String THEME_NAME = "bootstrap";
+  private ApplicationContext context;
+
+  /**
+   * @see org.apache.wicket.Application#getHomePage()
+   */
+  @Override
+  public Class<? extends WebPage> getHomePage() {
+    return DashboardPage.class;
+  }
+
+  /**
+   * @see org.apache.wicket.Application#init()
+   */
+  @Override
+  public void init() {
+    super.init();
+    BootstrapSettings settings = new BootstrapSettings();
+    Bootstrap.install(this, settings);
+    configureTheme(settings);
+
+    getComponentInstantiationListeners().add(
+        new SpringComponentInjector(this, context));
+  }
+
+  private void configureTheme(BootstrapSettings settings) {
+    Theme theme = new Theme(THEME_NAME, BootstrapCssReference.instance(),
+        FontAwesomeCssReference.instance(), NutchUiCssReference.instance());
+    settings.setThemeProvider(new SingleThemeProvider(theme));
+  }
+
+  @Override
+  public void setApplicationContext(ApplicationContext applicationContext)
+      throws BeansException {
+    this.context = applicationContext;
+  }
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/webui/NutchUiApplication.properties
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/webui/NutchUiApplication.properties b/nutch-core/src/main/java/org/apache/nutch/webui/NutchUiApplication.properties
new file mode 100644
index 0000000..4c62939
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/webui/NutchUiApplication.properties
@@ -0,0 +1,63 @@
+#############################################################################
+#Licensed to the Apache Software Foundation (ASF) under one or more
+#contributor license agreements.  See the NOTICE file distributed with
+#this work for additional information regarding copyright ownership.
+#The ASF licenses this file to You under the Apache License, Version 2.0
+#(the "License"); you may not use this file except in compliance with
+#the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+#############################################################################
+
+navbar.menu.dashboard = Dashboard
+navbar.menu.statistics = Statistics
+navbar.menu.instances = Instances
+navbar.menu.settings = Settings
+navbar.menu.crawls = Crawls
+navbar.menu.scheduling = Scheduling
+navbar.menu.search = Search
+navbar.menu.url = URLs upload
+navbar.menu.seedLists = Seed lists
+
+page.header.seedList = Seed list
+
+navbar.userMenu.settings = Settings
+navbar.userMenu.logout = Log out
+
+menu.settings=Settings
+menu.instances=Instances
+
+connected=Connected
+disconnected=Disconnected
+
+##ENUMS
+ConnectionStatus.CONNECTING=Connecting
+ConnectionStatus.CONNECTED=Connected
+ConnectionStatus.DISCONNECTED=Disconnected
+
+CrawlStatus.NEW=New
+CrawlStatus.ERROR=Error
+CrawlStatus.CRAWLING=Crawling
+CrawlStatus.FINISHED=Finished
+
+instances=Instances
+instances.header.name=Instance name
+instances.header.hostname=Hostname
+instances.header.status=Status
+instances.header.username=Username
+instances.label.name=Instance name
+instances.label.hostname=Hostname
+instances.label.port=Port
+instances.label.username=Username
+instances.label.password=Password
+instances.buttons.addInstance=Add instance
+
+settings=Settings
+settings.header.name = Name
+settings.header.value = Value
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/webui/NutchUiServer.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/webui/NutchUiServer.java b/nutch-core/src/main/java/org/apache/nutch/webui/NutchUiServer.java
new file mode 100644
index 0000000..d534b8f
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/webui/NutchUiServer.java
@@ -0,0 +1,104 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.webui;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.wicket.protocol.http.WicketFilter;
+import org.apache.wicket.spring.SpringWebApplicationFactory;
+import org.mortbay.jetty.Handler;
+import org.mortbay.jetty.Server;
+import org.mortbay.jetty.servlet.Context;
+import org.mortbay.jetty.servlet.DefaultServlet;
+import org.mortbay.jetty.servlet.FilterHolder;
+import org.springframework.web.context.ContextLoaderListener;
+import org.springframework.web.context.WebApplicationContext;
+import org.springframework.web.context.request.RequestContextListener;
+import org.springframework.web.context.support.AnnotationConfigWebApplicationContext;
+
+public class NutchUiServer {
+  private static final String APP_FACTORY_NAME = SpringWebApplicationFactory.class
+      .getName();
+  private static final String CONFIG_LOCATION = "org.apache.nutch.webui";
+  private static final String CMD_PORT = "port";
+  private static Integer port = 8080;
+
+  public static void main(String[] args) throws Exception {
+    CommandLineParser parser = new GnuParser();
+    Options options = createWebAppOptions();
+    CommandLine commandLine = null;
+    HelpFormatter formatter = new HelpFormatter();
+    try {
+      commandLine = parser.parse(options, args);
+    } catch (Exception e) {
+      formatter.printHelp("NutchUiServer", options, true);
+      StringUtils.stringifyException(e);
+    }
+
+    if (commandLine.hasOption("help")) {
+      formatter.printHelp("NutchUiServer", options, true);
+      return;
+    }
+    if (commandLine.hasOption(CMD_PORT)) {
+      port = Integer.parseInt(commandLine.getOptionValue(CMD_PORT));
+    }
+    startServer();
+  }
+
+  private static void startServer() throws Exception, InterruptedException {
+    Server server = new Server(port);
+    Context context = new Context(server, "/", Context.SESSIONS);
+    context.addServlet(DefaultServlet.class, "/*");
+
+    context.addEventListener(new ContextLoaderListener(getContext()));
+    context.addEventListener(new RequestContextListener());
+
+    WicketFilter filter = new WicketFilter();
+    filter.setFilterPath("/");
+    FilterHolder holder = new FilterHolder(filter);
+    holder.setInitParameter("applicationFactoryClassName", APP_FACTORY_NAME);
+    context.addFilter(holder, "/*", Handler.DEFAULT);
+
+    server.setHandler(context);
+    server.start();
+    server.join();
+  }
+
+  private static WebApplicationContext getContext() {
+    AnnotationConfigWebApplicationContext context = new AnnotationConfigWebApplicationContext();
+    context.setConfigLocation(CONFIG_LOCATION);
+    return context;
+  }
+
+  private static Options createWebAppOptions() {
+    Options options = new Options();
+    Option helpOpt = new Option("h", "help", false, "show this help message");
+    OptionBuilder.withDescription("Port to run the WebApplication on.");
+    OptionBuilder.hasOptionalArg();
+    OptionBuilder.withArgName("port number");
+    options.addOption(OptionBuilder.create(CMD_PORT));
+    options.addOption(helpOpt);
+    return options;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/webui/client/NutchClient.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/webui/client/NutchClient.java b/nutch-core/src/main/java/org/apache/nutch/webui/client/NutchClient.java
new file mode 100644
index 0000000..3f8887d
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/webui/client/NutchClient.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.webui.client;
+
+import java.util.Map;
+
+import org.apache.nutch.webui.client.model.ConnectionStatus;
+import org.apache.nutch.webui.client.model.JobConfig;
+import org.apache.nutch.webui.client.model.JobInfo;
+import org.apache.nutch.webui.client.model.NutchStatus;
+import org.apache.nutch.webui.model.NutchInstance;
+import org.apache.nutch.webui.model.SeedList;
+
+public interface NutchClient {
+
+  public NutchInstance getNutchInstance();
+
+  public NutchStatus getNutchStatus();
+
+  public ConnectionStatus getConnectionStatus();
+
+  public String executeJob(JobConfig jobConfig);
+
+  public JobInfo getJobInfo(String jobId);
+
+  public Map<String, String> getNutchConfig(String config);
+
+  /**
+   * Create seed list and return seed directory location
+   * 
+   * @param seedList
+   * @return
+   */
+  public String createSeed(SeedList seedList);
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/webui/client/NutchClientFactory.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/webui/client/NutchClientFactory.java b/nutch-core/src/main/java/org/apache/nutch/webui/client/NutchClientFactory.java
new file mode 100644
index 0000000..32da00e
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/webui/client/NutchClientFactory.java
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.webui.client;
+
+import java.util.concurrent.ExecutionException;
+
+import org.apache.nutch.webui.client.impl.NutchClientImpl;
+import org.apache.nutch.webui.model.NutchInstance;
+import org.springframework.stereotype.Component;
+
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.LoadingCache;
+
+@Component
+public class NutchClientFactory {
+  private LoadingCache<NutchInstance, NutchClient> cache;
+
+  public NutchClientFactory() {
+    cache = CacheBuilder.newBuilder().build(new NutchClientCacheLoader());
+  }
+
+  public NutchClient getClient(NutchInstance instance) {
+    try {
+      return cache.get(instance);
+    } catch (ExecutionException e) {
+      throw new IllegalStateException(e);
+    }
+  }
+
+  private static class NutchClientCacheLoader extends
+      CacheLoader<NutchInstance, NutchClient> {
+    @Override
+    public NutchClient load(NutchInstance key) throws Exception {
+      return new NutchClientImpl(key);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/webui/client/impl/CrawlingCycle.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/webui/client/impl/CrawlingCycle.java b/nutch-core/src/main/java/org/apache/nutch/webui/client/impl/CrawlingCycle.java
new file mode 100644
index 0000000..2482c06
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/webui/client/impl/CrawlingCycle.java
@@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.webui.client.impl;
+
+import java.util.List;
+
+import org.apache.commons.collections4.CollectionUtils;
+import org.apache.nutch.webui.client.model.Crawl;
+import org.apache.nutch.webui.client.model.JobInfo;
+import org.apache.nutch.webui.client.model.JobInfo.State;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.collect.Lists;
+
+/**
+ * This class implements crawl cycle as in crawl script
+ * 
+ * @author feodor
+ * 
+ */
+public class CrawlingCycle {
+  private Logger log = LoggerFactory.getLogger(CrawlingCycle.class);
+
+  private CrawlingCycleListener listener;
+  private RemoteCommandExecutor executor;
+  private Crawl crawl;
+
+  private List<RemoteCommand> remoteCommands;
+  private List<RemoteCommand> executedCommands = Lists.newArrayList();
+
+  public CrawlingCycle(CrawlingCycleListener listener,
+      RemoteCommandExecutor executor, Crawl crawl, List<RemoteCommand> commands) {
+    this.listener = listener;
+    this.executor = executor;
+    this.crawl = crawl;
+    this.remoteCommands = commands;
+  }
+
+  public synchronized void executeCrawlCycle() {
+    listener.crawlingStarted(crawl);
+
+    for (RemoteCommand command : remoteCommands) {
+      JobInfo jobInfo = executor.executeRemoteJob(command);
+      command.setJobInfo(jobInfo);
+
+      log.info("Executed remote command data: {}", command);
+
+      if (jobInfo.getState() == State.FAILED) {
+        listener.onCrawlError(crawl, jobInfo.getMsg());
+        return;
+      }
+
+      executedCommands.add(command);
+      listener.commandExecuted(crawl, command, calculateProgress());
+    }
+    listener.crawlingFinished(crawl);
+  }
+
+  private int calculateProgress() {
+    if (CollectionUtils.isEmpty(remoteCommands)) {
+      return 0;
+    }
+    return (int) ((float) executedCommands.size()
+        / (float) remoteCommands.size() * 100);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/webui/client/impl/CrawlingCycleListener.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/webui/client/impl/CrawlingCycleListener.java b/nutch-core/src/main/java/org/apache/nutch/webui/client/impl/CrawlingCycleListener.java
new file mode 100644
index 0000000..c2abde5
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/webui/client/impl/CrawlingCycleListener.java
@@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.webui.client.impl;
+
+import org.apache.nutch.webui.client.model.Crawl;
+
+public interface CrawlingCycleListener {
+
+  void crawlingStarted(Crawl crawl);
+
+  void onCrawlError(Crawl crawl, String msg);
+
+  void commandExecuted(Crawl crawl, RemoteCommand command, int progress);
+
+  void crawlingFinished(Crawl crawl);
+
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/webui/client/impl/NutchClientImpl.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/webui/client/impl/NutchClientImpl.java b/nutch-core/src/main/java/org/apache/nutch/webui/client/impl/NutchClientImpl.java
new file mode 100644
index 0000000..1a577f9
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/webui/client/impl/NutchClientImpl.java
@@ -0,0 +1,99 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.webui.client.impl;
+
+import static javax.ws.rs.core.MediaType.APPLICATION_JSON;
+
+import java.util.Map;
+
+import org.apache.nutch.webui.client.NutchClient;
+import org.apache.nutch.webui.client.model.ConnectionStatus;
+import org.apache.nutch.webui.client.model.JobConfig;
+import org.apache.nutch.webui.client.model.JobInfo;
+import org.apache.nutch.webui.client.model.NutchStatus;
+import org.apache.nutch.webui.model.NutchInstance;
+import org.apache.nutch.webui.model.SeedList;
+
+import com.sun.jersey.api.client.Client;
+import com.sun.jersey.api.client.WebResource;
+import com.sun.jersey.api.client.config.ClientConfig;
+import com.sun.jersey.api.client.config.DefaultClientConfig;
+import com.sun.jersey.api.json.JSONConfiguration;
+
+public class NutchClientImpl implements NutchClient {
+  private Client client;
+  private WebResource nutchResource;
+  private NutchInstance instance;
+
+  public NutchClientImpl(NutchInstance instance) {
+    this.instance = instance;
+    createClient();
+  }
+
+  public void createClient() {
+    ClientConfig clientConfig = new DefaultClientConfig();
+    clientConfig.getFeatures()
+        .put(JSONConfiguration.FEATURE_POJO_MAPPING, true);
+    this.client = Client.create(clientConfig);
+    this.nutchResource = client.resource(instance.getUrl());
+  }
+
+  @Override
+  public NutchStatus getNutchStatus() {
+    return nutchResource.path("/admin").type(APPLICATION_JSON)
+        .get(NutchStatus.class);
+  }
+
+  @Override
+  public ConnectionStatus getConnectionStatus() {
+
+    getNutchStatus();
+    return ConnectionStatus.CONNECTED;
+    // TODO implement disconnected status
+  }
+
+  @Override
+  public String executeJob(JobConfig jobConfig) {
+    JobInfo jobInfo = nutchResource.path("/job/create").type(APPLICATION_JSON)
+        .post(JobInfo.class, jobConfig);
+    return jobInfo.getId();
+  }
+
+  @Override
+  public JobInfo getJobInfo(String jobId) {
+    return nutchResource.path("/job/" + jobId).type(APPLICATION_JSON)
+        .get(JobInfo.class);
+  }
+
+  @Override
+  public NutchInstance getNutchInstance() {
+    return instance;
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public Map<String, String> getNutchConfig(String config) {
+    return nutchResource.path("/config/" + config).type(APPLICATION_JSON)
+        .get(Map.class);
+  }
+
+  @Override
+  public String createSeed(SeedList seedList) {
+    return nutchResource.path("/seed/create").type(APPLICATION_JSON)
+        .post(String.class, seedList);
+  }
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/webui/client/impl/RemoteCommand.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/webui/client/impl/RemoteCommand.java b/nutch-core/src/main/java/org/apache/nutch/webui/client/impl/RemoteCommand.java
new file mode 100644
index 0000000..ea19a8a
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/webui/client/impl/RemoteCommand.java
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.webui.client.impl;
+
+import java.io.Serializable;
+import java.text.MessageFormat;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.nutch.webui.client.model.JobConfig;
+import org.apache.nutch.webui.client.model.JobInfo;
+import org.joda.time.Duration;
+
+public class RemoteCommand implements Serializable {
+  private JobConfig jobConfig;
+  private JobInfo jobInfo = new JobInfo();
+  private Duration timeout;
+
+  /**
+   * Use {@link RemoteCommandBuilder} instead
+   */
+  @SuppressWarnings("unused")
+  private RemoteCommand() {
+  }
+
+  public RemoteCommand(JobConfig jobConfig) {
+    this.jobConfig = jobConfig;
+  }
+
+  public JobConfig getJobConfig() {
+    return jobConfig;
+  }
+
+  public void setJobConfig(JobConfig jobConfig) {
+    this.jobConfig = jobConfig;
+  }
+
+  public JobInfo getJobInfo() {
+    return jobInfo;
+  }
+
+  public void setJobInfo(JobInfo jobInfo) {
+    this.jobInfo = jobInfo;
+  }
+
+  public Duration getTimeout() {
+    return timeout;
+  }
+
+  public void setTimeout(Duration timeout) {
+    this.timeout = timeout;
+  }
+
+  @Override
+  public String toString() {
+    String statusInfo = StringUtils.EMPTY;
+    if (jobInfo != null) {
+      statusInfo = MessageFormat.format("{0}", jobInfo.getState());
+    }
+    return MessageFormat.format("{0} status: {1}", jobConfig.getType(),
+        statusInfo);
+  }
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/webui/client/impl/RemoteCommandBuilder.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/webui/client/impl/RemoteCommandBuilder.java b/nutch-core/src/main/java/org/apache/nutch/webui/client/impl/RemoteCommandBuilder.java
new file mode 100644
index 0000000..d6b1767
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/webui/client/impl/RemoteCommandBuilder.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.webui.client.impl;
+
+import org.apache.nutch.webui.client.model.JobConfig;
+import org.apache.nutch.webui.client.model.JobInfo.JobType;
+import org.joda.time.Duration;
+
+public class RemoteCommandBuilder {
+  private JobConfig jobConfig = new JobConfig();
+  private Duration timeout = Duration.standardSeconds(10);
+
+  private RemoteCommandBuilder() {
+  }
+
+  public static RemoteCommandBuilder instance(JobType jobType) {
+    return new RemoteCommandBuilder().withJobType(jobType);
+  }
+
+  public RemoteCommandBuilder withJobType(JobType jobType) {
+    jobConfig.setType(jobType);
+    return this;
+  }
+
+  public RemoteCommandBuilder withConfigId(String configId) {
+    jobConfig.setConfId(configId);
+    return this;
+  }
+
+  public RemoteCommandBuilder withCrawlId(String crawlId) {
+    jobConfig.setCrawlId(crawlId);
+    return this;
+  }
+
+  public RemoteCommandBuilder withArgument(String key, String value) {
+    jobConfig.setArgument(key, value);
+    return this;
+  }
+
+  public RemoteCommandBuilder withTimeout(Duration timeout) {
+    this.timeout = timeout;
+    return this;
+  }
+
+  public RemoteCommand build() {
+    RemoteCommand remoteCommand = new RemoteCommand(jobConfig);
+    remoteCommand.setTimeout(timeout);
+    return remoteCommand;
+  }
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/webui/client/impl/RemoteCommandExecutor.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/webui/client/impl/RemoteCommandExecutor.java b/nutch-core/src/main/java/org/apache/nutch/webui/client/impl/RemoteCommandExecutor.java
new file mode 100644
index 0000000..e1eefc2
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/webui/client/impl/RemoteCommandExecutor.java
@@ -0,0 +1,110 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.webui.client.impl;
+
+import static com.google.common.base.Preconditions.checkState;
+
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.lang3.exception.ExceptionUtils;
+import org.apache.nutch.webui.client.NutchClient;
+import org.apache.nutch.webui.client.model.JobInfo;
+import org.apache.nutch.webui.client.model.JobInfo.State;
+import org.joda.time.DateTimeConstants;
+import org.joda.time.Duration;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This class executes remote job and waits for success/failure result
+ * 
+ * @author feodor
+ * 
+ */
+public class RemoteCommandExecutor {
+  private Logger log = LoggerFactory.getLogger(RemoteCommandExecutor.class);
+
+  private static final int DEFAULT_TIMEOUT_SEC = 60;
+  private Duration requestDelay = new Duration(500);
+
+  private NutchClient client;
+  private ExecutorService executor;
+
+  public RemoteCommandExecutor(NutchClient client) {
+    this.client = client;
+    this.executor = Executors.newSingleThreadExecutor();
+  }
+
+  public JobInfo executeRemoteJob(RemoteCommand command) {
+    try {
+      String jobId = client.executeJob(command.getJobConfig());
+      Future<JobInfo> chekerFuture = executor
+          .submit(new JobStateChecker(jobId));
+      return chekerFuture.get(getTimeout(command), TimeUnit.MILLISECONDS);
+    } catch (Exception e) {
+      log.error("Remote command failed", e);
+      JobInfo jobInfo = new JobInfo();
+      jobInfo.setState(State.FAILED);
+      jobInfo.setMsg(ExceptionUtils.getStackTrace(e));
+      return jobInfo;
+    }
+  }
+
+  private long getTimeout(RemoteCommand command) {
+    if (command.getTimeout() == null) {
+      return DEFAULT_TIMEOUT_SEC * DateTimeConstants.MILLIS_PER_SECOND;
+    }
+    return command.getTimeout().getMillis();
+  }
+
+  public void setRequestDelay(Duration requestDelay) {
+    this.requestDelay = requestDelay;
+  }
+
+  public class JobStateChecker implements Callable<JobInfo> {
+
+    private String jobId;
+
+    public JobStateChecker(String jobId) {
+      this.jobId = jobId;
+    }
+
+    @Override
+    public JobInfo call() throws Exception {
+      while (!Thread.interrupted()) {
+        JobInfo jobInfo = client.getJobInfo(jobId);
+        checkState(jobInfo != null, "Cannot get job info!");
+
+        State state = jobInfo.getState();
+        checkState(state != null, "Unknown job state!");
+
+        if (state == State.RUNNING || state == State.ANY || state == State.IDLE) {
+          Thread.sleep(requestDelay.getMillis());
+          continue;
+        }
+
+        return jobInfo;
+      }
+      return null;
+    }
+
+  }
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/webui/client/impl/RemoteCommandsBatchFactory.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/webui/client/impl/RemoteCommandsBatchFactory.java b/nutch-core/src/main/java/org/apache/nutch/webui/client/impl/RemoteCommandsBatchFactory.java
new file mode 100644
index 0000000..cef56a5
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/webui/client/impl/RemoteCommandsBatchFactory.java
@@ -0,0 +1,97 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.webui.client.impl;
+
+import java.util.List;
+import java.util.UUID;
+
+import org.apache.nutch.webui.client.model.Crawl;
+import org.apache.nutch.webui.client.model.JobInfo.JobType;
+import org.joda.time.Duration;
+import org.springframework.beans.factory.config.BeanDefinition;
+import org.springframework.context.annotation.Scope;
+import org.springframework.stereotype.Component;
+
+import com.google.common.collect.Lists;
+
+@Component
+@Scope(BeanDefinition.SCOPE_PROTOTYPE)
+public class RemoteCommandsBatchFactory {
+
+  private List<RemoteCommand> remoteCommands;
+  private Crawl crawl;
+
+  private String batchId;
+
+  public List<RemoteCommand> createCommands(Crawl crawl) {
+    this.crawl = crawl;
+    this.remoteCommands = Lists.newArrayList();
+
+    remoteCommands.add(inject());
+    for (int i = 0; i < crawl.getNumberOfRounds(); i++) {
+      remoteCommands.addAll(createBatchCommands());
+    }
+    return remoteCommands;
+  }
+
+  private List<RemoteCommand> createBatchCommands() {
+    this.batchId = UUID.randomUUID().toString();
+    List<RemoteCommand> batchCommands = Lists.newArrayList();
+
+    batchCommands.add(createGenerateCommand());
+    batchCommands.add(createFetchCommand());
+    batchCommands.add(createParseCommand());
+    batchCommands.add(createUpdateDbCommand());
+    batchCommands.add(createIndexCommand());
+
+    return batchCommands;
+  }
+
+  private RemoteCommand inject() {
+    RemoteCommandBuilder builder = RemoteCommandBuilder
+        .instance(JobType.INJECT).withCrawlId(crawl.getCrawlId())
+        .withArgument("url_dir", crawl.getSeedDirectory());
+    return builder.build();
+  }
+
+  private RemoteCommand createGenerateCommand() {
+    return createBuilder(JobType.GENERATE).build();
+  }
+
+  private RemoteCommand createFetchCommand() {
+    return createBuilder(JobType.FETCH).withTimeout(
+        Duration.standardSeconds(50)).build();
+  }
+
+  private RemoteCommand createParseCommand() {
+    return createBuilder(JobType.PARSE).build();
+  }
+
+  private RemoteCommand createIndexCommand() {
+    return createBuilder(JobType.INDEX).build();
+  }
+
+  private RemoteCommand createUpdateDbCommand() {
+    return createBuilder(JobType.UPDATEDB).build();
+  }
+
+  private RemoteCommandBuilder createBuilder(JobType jobType) {
+    return RemoteCommandBuilder.instance(jobType)
+        .withCrawlId(crawl.getCrawlId()).withArgument("batch", batchId);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/webui/client/model/ConnectionStatus.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/webui/client/model/ConnectionStatus.java b/nutch-core/src/main/java/org/apache/nutch/webui/client/model/ConnectionStatus.java
new file mode 100644
index 0000000..d834612
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/webui/client/model/ConnectionStatus.java
@@ -0,0 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.webui.client.model;
+
+public enum ConnectionStatus {
+  CONNECTING, CONNECTED, DISCONNECTED;
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/webui/client/model/Crawl.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/webui/client/model/Crawl.java b/nutch-core/src/main/java/org/apache/nutch/webui/client/model/Crawl.java
new file mode 100644
index 0000000..6057f7f
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/webui/client/model/Crawl.java
@@ -0,0 +1,126 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.webui.client.model;
+
+import java.io.Serializable;
+
+import javax.persistence.Column;
+import javax.persistence.Entity;
+import javax.persistence.GeneratedValue;
+import javax.persistence.Id;
+
+import org.apache.nutch.webui.model.SeedList;
+
+import com.j256.ormlite.field.DatabaseField;
+
+@Entity
+public class Crawl implements Serializable {
+  public enum CrawlStatus {
+    NEW, CRAWLING, FINISHED, ERROR
+  }
+
+  @Id
+  @GeneratedValue
+  private Long id;
+
+  @Column
+  private String crawlId;
+
+  @Column
+  private String crawlName;
+
+  @Column
+  private CrawlStatus status = CrawlStatus.NEW;
+
+  @Column
+  private Integer numberOfRounds = 1;
+
+  @Column
+  @DatabaseField(foreign = true, foreignAutoRefresh = true)
+  private SeedList seedList;
+
+  @Column
+  private String seedDirectory;
+
+  @Column
+  private int progress;
+
+  public Integer getNumberOfRounds() {
+    return numberOfRounds;
+  }
+
+  public void setNumberOfRounds(Integer numberOfRounds) {
+    this.numberOfRounds = numberOfRounds;
+  }
+
+  public String getCrawlId() {
+    return crawlId;
+  }
+
+  public void setCrawlId(String crawlId) {
+    this.crawlId = crawlId;
+  }
+
+  public CrawlStatus getStatus() {
+    return status;
+  }
+
+  public void setStatus(CrawlStatus status) {
+    this.status = status;
+  }
+
+  public String getCrawlName() {
+    return crawlName;
+  }
+
+  public void setCrawlName(String crawlName) {
+    this.crawlName = crawlName;
+  }
+
+  public SeedList getSeedList() {
+    return seedList;
+  }
+
+  public void setSeedList(SeedList seedList) {
+    this.seedList = seedList;
+  }
+
+  public Long getId() {
+    return id;
+  }
+
+  public void setId(Long id) {
+    this.id = id;
+  }
+
+  public String getSeedDirectory() {
+    return seedDirectory;
+  }
+
+  public void setSeedDirectory(String seedDirectory) {
+    this.seedDirectory = seedDirectory;
+  }
+
+  public int getProgress() {
+    return progress;
+  }
+
+  public void setProgress(int progress) {
+    this.progress = progress;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/webui/client/model/JobConfig.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/webui/client/model/JobConfig.java b/nutch-core/src/main/java/org/apache/nutch/webui/client/model/JobConfig.java
new file mode 100644
index 0000000..80df279
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/webui/client/model/JobConfig.java
@@ -0,0 +1,77 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.apache.nutch.webui.client.model;
+
+import java.io.Serializable;
+import java.util.Collections;
+import java.util.Map;
+
+import org.apache.nutch.webui.client.model.JobInfo.JobType;
+
+import com.google.common.collect.Maps;
+
+public class JobConfig implements Serializable {
+  private String crawlId;
+  private JobType type;
+  private String confId = "default";
+  private String jobClassName;
+  private Map<String, Object> args = Maps.newHashMap();
+
+  public void setArgument(String key, String value) {
+    args.put(key, value);
+  }
+
+  public String getCrawlId() {
+    return crawlId;
+  }
+
+  public void setCrawlId(String crawlId) {
+    this.crawlId = crawlId;
+  }
+
+  public JobType getType() {
+    return type;
+  }
+
+  public void setType(JobType type) {
+    this.type = type;
+  }
+
+  public String getConfId() {
+    return confId;
+  }
+
+  public void setConfId(String confId) {
+    this.confId = confId;
+  }
+
+  public Map<String, Object> getArgs() {
+    return Collections.unmodifiableMap(args);
+  }
+
+  public void setArgs(Map<String, Object> args) {
+    this.args = args;
+  }
+
+  public String getJobClassName() {
+    return jobClassName;
+  }
+
+  public void setJobClassName(String jobClass) {
+    this.jobClassName = jobClass;
+  }
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/webui/client/model/JobInfo.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/webui/client/model/JobInfo.java b/nutch-core/src/main/java/org/apache/nutch/webui/client/model/JobInfo.java
new file mode 100644
index 0000000..312118a
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/webui/client/model/JobInfo.java
@@ -0,0 +1,104 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.apache.nutch.webui.client.model;
+
+import java.io.Serializable;
+import java.util.Map;
+
+public class JobInfo implements Serializable {
+  public static enum JobType {
+    INJECT, GENERATE, FETCH, PARSE, UPDATEDB, INDEX, READDB, CLASS
+  };
+
+  public static enum State {
+    IDLE, RUNNING, FINISHED, FAILED, KILLED, STOPPING, KILLING, ANY
+  };
+
+  private String id;
+  private String type;
+  private String confId;
+  private Map<String, Object> args;
+  private Map<String, Object> result;
+  private State state;
+  private String msg;
+  private String crawlId;
+
+  public String getMsg() {
+    return msg;
+  }
+
+  public void setMsg(String msg) {
+    this.msg = msg;
+  }
+
+  public State getState() {
+    return state;
+  }
+
+  public void setState(State state) {
+    this.state = state;
+  }
+
+  public Map<String, Object> getResult() {
+    return result;
+  }
+
+  public void setResult(Map<String, Object> result) {
+    this.result = result;
+  }
+
+  public Map<String, Object> getArgs() {
+    return args;
+  }
+
+  public void setArgs(Map<String, Object> args) {
+    this.args = args;
+  }
+
+  public String getConfId() {
+    return confId;
+  }
+
+  public void setConfId(String confId) {
+    this.confId = confId;
+  }
+
+  public String getId() {
+    return id;
+  }
+
+  public void setId(String id) {
+    this.id = id;
+  }
+
+  public String getCrawlId() {
+    return crawlId;
+  }
+
+  public void setCrawlId(String crawlId) {
+    this.crawlId = crawlId;
+  }
+
+  public String getType() {
+    return type;
+  }
+
+  public void setType(String type) {
+    this.type = type;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/webui/client/model/NutchStatus.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/webui/client/model/NutchStatus.java b/nutch-core/src/main/java/org/apache/nutch/webui/client/model/NutchStatus.java
new file mode 100644
index 0000000..0c5c425
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/webui/client/model/NutchStatus.java
@@ -0,0 +1,62 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.apache.nutch.webui.client.model;
+
+import java.io.Serializable;
+import java.util.Collection;
+import java.util.Date;
+import java.util.Set;
+
+public class NutchStatus implements Serializable {
+
+  private Date startDate;
+  private Set<String> configuration;
+  private Collection<JobInfo> jobs;
+  private Collection<JobInfo> runningJobs;
+
+  public Date getStartDate() {
+    return startDate;
+  }
+
+  public void setStartDate(Date startDate) {
+    this.startDate = startDate;
+  }
+
+  public Set<String> getConfiguration() {
+    return configuration;
+  }
+
+  public void setConfiguration(Set<String> configuration) {
+    this.configuration = configuration;
+  }
+
+  public Collection<JobInfo> getJobs() {
+    return jobs;
+  }
+
+  public void setJobs(Collection<JobInfo> jobs) {
+    this.jobs = jobs;
+  }
+
+  public Collection<JobInfo> getRunningJobs() {
+    return runningJobs;
+  }
+
+  public void setRunningJobs(Collection<JobInfo> runningJobs) {
+    this.runningJobs = runningJobs;
+  }
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/webui/config/CustomDaoFactory.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/webui/config/CustomDaoFactory.java b/nutch-core/src/main/java/org/apache/nutch/webui/config/CustomDaoFactory.java
new file mode 100644
index 0000000..09c2d6a
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/webui/config/CustomDaoFactory.java
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.webui.config;
+
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import com.j256.ormlite.dao.Dao;
+import com.j256.ormlite.spring.DaoFactory;
+import com.j256.ormlite.support.ConnectionSource;
+
+public class CustomDaoFactory {
+  private ConnectionSource connectionSource;
+  private List<Dao<?, ?>> registredDaos = Collections
+      .synchronizedList(new ArrayList<Dao<?, ?>>());
+
+  public CustomDaoFactory(ConnectionSource connectionSource) {
+    this.connectionSource = connectionSource;
+  }
+
+  public <T, ID> Dao<T, ID> createDao(Class<T> clazz) {
+    try {
+      Dao<T, ID> dao = DaoFactory.createDao(connectionSource, clazz);
+      register(dao);
+      return dao;
+    } catch (SQLException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  private <T, ID> void register(Dao<T, ID> dao) {
+    synchronized (registredDaos) {
+      registredDaos.add(dao);
+    }
+  }
+
+  public List<Dao<?, ?>> getCreatedDaos() {
+    synchronized (registredDaos) {
+      return Collections.unmodifiableList(registredDaos);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/webui/config/CustomTableCreator.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/webui/config/CustomTableCreator.java b/nutch-core/src/main/java/org/apache/nutch/webui/config/CustomTableCreator.java
new file mode 100644
index 0000000..9b31d73
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/webui/config/CustomTableCreator.java
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.webui.config;
+
+import java.sql.SQLException;
+import java.util.List;
+
+import com.j256.ormlite.dao.BaseDaoImpl;
+import com.j256.ormlite.dao.Dao;
+import com.j256.ormlite.support.ConnectionSource;
+import com.j256.ormlite.table.DatabaseTableConfig;
+import com.j256.ormlite.table.TableUtils;
+
+public class CustomTableCreator {
+
+  private ConnectionSource connectionSource;
+  private List<Dao<?, ?>> configuredDaos;
+
+  public CustomTableCreator(ConnectionSource connectionSource,
+      List<Dao<?, ?>> configuredDaos) {
+    this.connectionSource = connectionSource;
+    this.configuredDaos = configuredDaos;
+    initialize();
+  }
+
+  private void initialize() {
+    if (configuredDaos == null) {
+      throw new IllegalStateException("configuredDaos was not set in "
+          + getClass().getSimpleName());
+    }
+
+    for (Dao<?, ?> dao : configuredDaos) {
+      createTableForDao(dao);
+    }
+  }
+
+  private void createTableForDao(Dao<?, ?> dao) {
+    DatabaseTableConfig<?> tableConfig = getTableConfig(dao);
+    createTableIfNotExists(tableConfig);
+  }
+
+  private DatabaseTableConfig<?> getTableConfig(Dao<?, ?> dao) {
+    Class<?> clazz = dao.getDataClass();
+    DatabaseTableConfig<?> tableConfig = null;
+    if (dao instanceof BaseDaoImpl) {
+      tableConfig = ((BaseDaoImpl<?, ?>) dao).getTableConfig();
+    }
+    if (tableConfig == null) {
+      return getConfigFromClass(clazz);
+    }
+    return tableConfig;
+  }
+
+  private DatabaseTableConfig<?> getConfigFromClass(Class<?> clazz) {
+    try {
+      return DatabaseTableConfig.fromClass(connectionSource, clazz);
+    } catch (SQLException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  private void createTableIfNotExists(DatabaseTableConfig<?> tableConfig) {
+    try {
+      TableUtils.createTableIfNotExists(connectionSource, tableConfig);
+    } catch (SQLException e) {
+      throw new RuntimeException(e);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/webui/config/NutchGuiConfiguration.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/webui/config/NutchGuiConfiguration.java b/nutch-core/src/main/java/org/apache/nutch/webui/config/NutchGuiConfiguration.java
new file mode 100644
index 0000000..8b76440
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/webui/config/NutchGuiConfiguration.java
@@ -0,0 +1,33 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.webui.config;
+
+import java.util.List;
+
+import org.apache.nutch.webui.model.NutchInstance;
+
+public class NutchGuiConfiguration {
+  private List<NutchInstance> instances;
+
+  public List<NutchInstance> getInstances() {
+    return instances;
+  }
+
+  public void setInstances(List<NutchInstance> instances) {
+    this.instances = instances;
+  }
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/webui/config/SpringConfiguration.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/webui/config/SpringConfiguration.java b/nutch-core/src/main/java/org/apache/nutch/webui/config/SpringConfiguration.java
new file mode 100644
index 0000000..1687cee
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/webui/config/SpringConfiguration.java
@@ -0,0 +1,91 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.webui.config;
+
+import java.sql.SQLException;
+import java.util.concurrent.Executor;
+
+import org.apache.nutch.webui.client.model.Crawl;
+import org.apache.nutch.webui.model.NutchInstance;
+import org.apache.nutch.webui.model.SeedList;
+import org.apache.nutch.webui.model.SeedUrl;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.scheduling.annotation.AsyncConfigurer;
+import org.springframework.scheduling.annotation.EnableAsync;
+import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
+
+import com.j256.ormlite.dao.Dao;
+import com.j256.ormlite.db.H2DatabaseType;
+import com.j256.ormlite.jdbc.JdbcConnectionSource;
+
+@Configuration
+@EnableAsync
+public class SpringConfiguration implements AsyncConfigurer {
+
+  @Override
+  public Executor getAsyncExecutor() {
+    // TODO move magic numbers to properties file
+    ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
+    executor.setCorePoolSize(7);
+    executor.setMaxPoolSize(42);
+    executor.setQueueCapacity(11);
+    executor.setThreadNamePrefix("SpringExecutor-");
+    executor.initialize();
+    return executor;
+  }
+
+  @Bean
+  public JdbcConnectionSource getConnectionSource() throws SQLException {
+    JdbcConnectionSource source = new JdbcConnectionSource(
+        "jdbc:h2:~/.nutch/config", new H2DatabaseType());
+    source.initialize();
+    return source;
+  }
+
+  @Bean
+  public CustomDaoFactory getDaoFactory() throws SQLException {
+    return new CustomDaoFactory(getConnectionSource());
+  }
+
+  @Bean
+  public Dao<NutchInstance, Long> createNutchDao() throws SQLException {
+    return getDaoFactory().createDao(NutchInstance.class);
+  }
+
+  @Bean
+  public Dao<SeedList, Long> createSeedListDao() throws SQLException {
+    return getDaoFactory().createDao(SeedList.class);
+  }
+
+  @Bean
+  public Dao<SeedUrl, Long> createSeedUrlDao() throws SQLException {
+    return getDaoFactory().createDao(SeedUrl.class);
+  }
+
+  @Bean
+  public Dao<Crawl, Long> createCrawlDao() throws SQLException {
+    return getDaoFactory().createDao(Crawl.class);
+  }
+
+  @Bean
+  public CustomTableCreator createTableCreator() throws SQLException {
+    return new CustomTableCreator(getConnectionSource(), getDaoFactory()
+        .getCreatedDaos());
+  }
+
+}