You are viewing a plain text version of this content. The canonical link for it is here.
Posted to droids-commits@incubator.apache.org by ol...@apache.org on 2008/11/06 13:30:36 UTC

svn commit: r711854 - in /incubator/droids/trunk/src: java/ java/org/apache/droids/api/ java/org/apache/droids/exception/ java/org/apache/droids/impl/ java/org/apache/droids/parse/html/ java/org/apache/droids/robot/crawler/ test/java/org/apache/droids/...

Author: olegk
Date: Thu Nov  6 05:30:18 2008
New Revision: 711854

URL: http://svn.apache.org/viewvc?rev=711854&view=rev
Log:
* Refactored exception handling code in HtmlParser
* Replaced URL calls with equivalent URI calls in HtmlParser
* Added SequentialTaskMaster intended to execute Tasks sequentially one at a time
* Added test case to test termination of Droid execution in case of an exception

Added:
    incubator/droids/trunk/src/java/org/apache/droids/exception/ContentFormatViolationException.java
    incubator/droids/trunk/src/java/org/apache/droids/exception/InvalidLinkException.java
    incubator/droids/trunk/src/java/org/apache/droids/impl/SequentialTaskMaster.java
    incubator/droids/trunk/src/test/java/org/apache/droids/DroidsFactory.java
Modified:
    incubator/droids/trunk/src/java/log4j.properties
    incubator/droids/trunk/src/java/org/apache/droids/api/Parser.java
    incubator/droids/trunk/src/java/org/apache/droids/parse/html/HtmlParser.java
    incubator/droids/trunk/src/java/org/apache/droids/robot/crawler/CrawlingWorker.java
    incubator/droids/trunk/src/test/java/org/apache/droids/impl/TestSimpleDroid.java

Modified: incubator/droids/trunk/src/java/log4j.properties
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/src/java/log4j.properties?rev=711854&r1=711853&r2=711854&view=diff
==============================================================================
--- incubator/droids/trunk/src/java/log4j.properties (original)
+++ incubator/droids/trunk/src/java/log4j.properties Thu Nov  6 05:30:18 2008
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-log4j.rootLogger=@loglevel@, stdout, logfile
+log4j.rootLogger=INFO, stdout, logfile
 
 log4j.appender.stdout=org.apache.log4j.ConsoleAppender
 log4j.appender.stdout.layout=org.apache.log4j.PatternLayout

Modified: incubator/droids/trunk/src/java/org/apache/droids/api/Parser.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/src/java/org/apache/droids/api/Parser.java?rev=711854&r1=711853&r2=711854&view=diff
==============================================================================
--- incubator/droids/trunk/src/java/org/apache/droids/api/Parser.java (original)
+++ incubator/droids/trunk/src/java/org/apache/droids/api/Parser.java Thu Nov  6 05:30:18 2008
@@ -16,8 +16,11 @@
  */
 package org.apache.droids.api;
 
+import java.io.IOException;
 import java.io.InputStream;
 
+import org.apache.droids.exception.DroidsException;
+
 /**
  * Simple parser that is only forcing to return a parse object.
  * 
@@ -35,5 +38,5 @@
    *                the link that correspond to the stream
    * @return the parse object
    */
-  Parse getParse(InputStream openStream, Link link);
+  Parse getParse(InputStream openStream, Link link) throws DroidsException, IOException;
 }

Added: incubator/droids/trunk/src/java/org/apache/droids/exception/ContentFormatViolationException.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/src/java/org/apache/droids/exception/ContentFormatViolationException.java?rev=711854&view=auto
==============================================================================
--- incubator/droids/trunk/src/java/org/apache/droids/exception/ContentFormatViolationException.java (added)
+++ incubator/droids/trunk/src/java/org/apache/droids/exception/ContentFormatViolationException.java Thu Nov  6 05:30:18 2008
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.droids.exception;
+
+/**
+ * Signals content format violation.
+ * 
+ * @version 1.0
+ */
+public class ContentFormatViolationException extends DroidsException {
+
+  private static final long serialVersionUID = -3897055120550880304L;
+
+  public ContentFormatViolationException(String message, Throwable cause) {
+    super(message);
+    initCause(cause);
+  }
+  
+}

Added: incubator/droids/trunk/src/java/org/apache/droids/exception/InvalidLinkException.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/src/java/org/apache/droids/exception/InvalidLinkException.java?rev=711854&view=auto
==============================================================================
--- incubator/droids/trunk/src/java/org/apache/droids/exception/InvalidLinkException.java (added)
+++ incubator/droids/trunk/src/java/org/apache/droids/exception/InvalidLinkException.java Thu Nov  6 05:30:18 2008
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.droids.exception;
+
+/**
+ * Signals Link format violation.
+ * 
+ * @version 1.0
+ */
+public class InvalidLinkException extends DroidsException {
+
+  private static final long serialVersionUID = 5608058374859478284L;
+
+  public InvalidLinkException(String message) {
+    super(message);
+  }
+  
+  public InvalidLinkException(String message, Throwable cause) {
+    super(message);
+    initCause(cause);
+  }
+  
+}

Added: incubator/droids/trunk/src/java/org/apache/droids/impl/SequentialTaskMaster.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/src/java/org/apache/droids/impl/SequentialTaskMaster.java?rev=711854&view=auto
==============================================================================
--- incubator/droids/trunk/src/java/org/apache/droids/impl/SequentialTaskMaster.java (added)
+++ incubator/droids/trunk/src/java/org/apache/droids/impl/SequentialTaskMaster.java Thu Nov  6 05:30:18 2008
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.droids.impl;
+
+import java.util.Date;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.droids.api.DelayTimer;
+import org.apache.droids.api.Droid;
+import org.apache.droids.api.Task;
+import org.apache.droids.api.TaskExceptionHandler;
+import org.apache.droids.api.TaskExceptionResult;
+import org.apache.droids.api.TaskMaster;
+import org.apache.droids.api.TaskQueue;
+import org.apache.droids.api.Worker;
+import org.apache.droids.helper.Loggable;
+
+public class SequentialTaskMaster<T extends Task> 
+                                      extends Loggable implements TaskMaster<T> 
+{
+  private final Object mutex;
+  
+  private volatile boolean completed;
+  private volatile Date startedWorking = null;
+  private volatile Date finishedWorking = null;
+  private volatile int completedTask = 0;
+  private volatile T lastCompletedTask = null;
+  
+  private DelayTimer delayTimer = null;
+  private TaskExceptionHandler exHandler = null;
+
+  public SequentialTaskMaster() {
+    super();
+    this.mutex = new Object();
+  }
+  
+  /**
+   * The queue has been initialized
+   */
+  public synchronized void processAllTasks(
+      final TaskQueue<T> queue, final Droid<T> droid) 
+  {
+    this.completed = false;
+    this.startedWorking = new Date();
+    this.finishedWorking = null;
+    this.completedTask = 0;
+    
+    boolean terminated = false;
+    while( !terminated ) {
+      T task = queue.next();
+      if (task == null) {
+        break;
+      }
+      if( delayTimer != null ) {
+        long delay = delayTimer.getDelayMillis();
+        if( delay > 0 ) {
+          try {
+            Thread.sleep( delay );
+          } 
+          catch (InterruptedException e) {}
+        }
+      }
+      Worker<T> worker = droid.getNewWorker();
+      try {
+        worker.execute( task );
+        completedTask++;
+        lastCompletedTask = task;
+      } catch (Exception ex) {
+        TaskExceptionResult result = TaskExceptionResult.WARN;  
+        if (exHandler != null) {
+          result = exHandler.handleException(ex); 
+        }
+        switch (result) {
+        case WARN:
+          log.warn(ex.getMessage());
+          break;
+        case FATAL:
+          log.warn(ex.getMessage());
+          terminated = true;
+          break;
+        }
+      }
+    }
+    finishedWorking = new Date();
+    droid.finished();
+    synchronized( mutex ) {
+      completed = true;
+      mutex.notifyAll();
+    }
+  }
+  
+  public final void setExceptionHandler(TaskExceptionHandler exHandler) {
+    this.exHandler = exHandler;
+  }
+  
+
+  public final void setDelayTimer(DelayTimer delayTimer) {
+    this.delayTimer = delayTimer;
+  }
+
+
+  public Date getStartTime() {
+    return startedWorking;
+  }
+  
+  
+  public Date getFinishedWorking() {
+    return finishedWorking;
+  }
+
+
+  public int getCompletedTasks() {
+    return completedTask;
+  }
+
+  
+  public T getLastCompletedTask() {
+    return lastCompletedTask;
+  }
+
+  
+  public void awaitTermination(long timeout, TimeUnit unit) throws InterruptedException {
+    if (timeout < 0) {
+      timeout = 0;
+    }
+    synchronized (this.mutex) {
+      long deadline = System.currentTimeMillis() + unit.toMillis(timeout);
+      long remaining = timeout;
+      while (!completed) {
+        this.mutex.wait(remaining);
+        if (timeout > 0) {
+          remaining = deadline - System.currentTimeMillis();
+          if (remaining <= 0) {
+            break;
+          }
+        }
+      }
+    }
+  }
+  
+}

Modified: incubator/droids/trunk/src/java/org/apache/droids/parse/html/HtmlParser.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/src/java/org/apache/droids/parse/html/HtmlParser.java?rev=711854&r1=711853&r2=711854&view=diff
==============================================================================
--- incubator/droids/trunk/src/java/org/apache/droids/parse/html/HtmlParser.java (original)
+++ incubator/droids/trunk/src/java/org/apache/droids/parse/html/HtmlParser.java Thu Nov  6 05:30:18 2008
@@ -15,10 +15,10 @@
  */
 package org.apache.droids.parse.html;
 
+import java.io.IOException;
 import java.io.InputStream;
-import java.net.MalformedURLException;
 import java.net.URI;
-import java.net.URL;
+import java.net.URISyntaxException;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -27,6 +27,9 @@
 import org.apache.droids.api.Link;
 import org.apache.droids.api.Parse;
 import org.apache.droids.api.Parser;
+import org.apache.droids.exception.ContentFormatViolationException;
+import org.apache.droids.exception.DroidsException;
+import org.apache.droids.exception.InvalidLinkException;
 import org.apache.droids.helper.Loggable;
 import org.apache.droids.LinkTask;
 import org.apache.droids.ParseData;
@@ -39,6 +42,7 @@
 import org.w3c.dom.NamedNodeMap;
 import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;
+import org.xml.sax.SAXException;
 import org.xml.sax.SAXNotRecognizedException;
 import org.xml.sax.SAXNotSupportedException;
 
@@ -61,17 +65,13 @@
     this.elements = elements;
   }
 
-  private URL base = null;
+  private URI base = null;
 
   private Link link = null;
 
-  public Parse getParse(InputStream stream, Link newLink) {
+  public Parse getParse(InputStream stream, Link newLink) throws DroidsException, IOException {
     this.link = newLink;
-    try {
-      this.base = new URL(newLink.getId());
-    } catch (MalformedURLException e1) {
-      log.fatal(e1);
-    }
+    this.base = newLink.getURI();
     ParseData parseData = null;
     // setup filter chain
     final XMLDocumentFilter[] filters = { getRemover() };
@@ -81,21 +81,20 @@
     // parse document
     // XMLInputSource source = new XMLInputSource(null, uri, uri);
     try {
-      parser.parse(base.toExternalForm(), node);
-      parseData = extract(node);
-    } catch (Exception e) {
-      log.fatal(e);
-      return new ParseImpl(stream.toString(), null);
+      parser.parse(base.toString(), node);
+    } catch (SAXException ex) {
+      throw new ContentFormatViolationException("Failure parsing HTML content", ex);
     }
+    parseData = extract(node);
     return new ParseImpl(stream.toString(), parseData);
   }
 
-  private ParseData extract(DocumentFragment node) {
+  private ParseData extract(DocumentFragment node) throws InvalidLinkException {
     final ArrayList<Link> links = new ArrayList<Link>();
     try {
       extractLinks(node, links, new HashSet<String>());
-    } catch (MalformedURLException e) {
-      log.fatal(e);
+    } catch (URISyntaxException ex) {
+      throw new InvalidLinkException(ex.getMessage(), ex);
     }
     return new ParseData(links);
   }
@@ -113,10 +112,10 @@
       parser.setFeature(
         "http://cyberneko.org/html/features/report-errors",
         false);
-    } catch (SAXNotRecognizedException e) {
-      log.fatal(e);
-    } catch (SAXNotSupportedException e) {
-      log.fatal(e);
+    } catch (SAXNotRecognizedException ex) {
+      throw new IllegalStateException(ex);
+    } catch (SAXNotSupportedException ex) {
+      throw new IllegalStateException(ex);
     }
     return parser;
   }
@@ -135,7 +134,7 @@
   }
 
   private void extractLinks(Node node, ArrayList<Link> links,
-      HashSet<String> set) throws MalformedURLException {
+      HashSet<String> set) throws URISyntaxException {
     if (node.getNodeType() == Node.ELEMENT_NODE) {
       String nodeName = node.getNodeName().toLowerCase();
       if (elements.containsKey(nodeName)) {
@@ -147,32 +146,28 @@
           String attrName = attr.getNodeName();
           if (attrName.equalsIgnoreCase(value)) {
             target = attr.getNodeValue();
-            try {
-              String newUrl = "";
-              if(target.startsWith("/")){
-                newUrl=base.getProtocol()+"://"+base.getHost();
-                if(base.getPort()>-1){
-                  newUrl+=":"+base.getPort();
-                }
-                newUrl += target;
-              }else if(!target.toLowerCase().startsWith("javascript")){
-                newUrl=new URL(base, target).toString();
+            String newUrl = "";
+            if(target.startsWith("/")){
+              newUrl=base.getScheme() + "://"+base.getHost();
+              if(base.getPort()>-1){
+                newUrl+=":"+base.getPort();
               }
-              if (!newUrl.equals("")) {
-                // Link from, URI uri, int depth, String text
-                String url = target.contains(":/") ? target : newUrl;
-                URI uri = new URI( url );
-                final LinkTask outlink = new LinkTask( link, uri, link.getDepth()+1 );
-                log.debug("set size: "+set.size());
-                log.debug("outlink.getToUrl(): "+outlink.getURI());
-                log.debug("set.contains(outlink.getToUrl(): "+set.contains(url));
-                if (!set.contains(url)) {
-                  set.add(url);
-                  links.add(outlink);
-                }
+              newUrl += target;
+            }else if(!target.toLowerCase().startsWith("javascript")){
+              newUrl = base.resolve(target).toString();
+            }
+            if (!newUrl.equals("")) {
+              // Link from, URI uri, int depth, String text
+              String url = target.contains(":/") ? target : newUrl;
+              URI uri = new URI( url );
+              final LinkTask outlink = new LinkTask( link, uri, link.getDepth()+1 );
+              log.debug("set size: "+set.size());
+              log.debug("outlink.getToUrl(): "+outlink.getURI());
+              log.debug("set.contains(outlink.getToUrl(): "+set.contains(url));
+              if (!set.contains(url)) {
+                set.add(url);
+                links.add(outlink);
               }
-            } catch (Exception e) {
-              log.fatal(e);
             }
           }
         }

Modified: incubator/droids/trunk/src/java/org/apache/droids/robot/crawler/CrawlingWorker.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/src/java/org/apache/droids/robot/crawler/CrawlingWorker.java?rev=711854&r1=711853&r2=711854&view=diff
==============================================================================
--- incubator/droids/trunk/src/java/org/apache/droids/robot/crawler/CrawlingWorker.java (original)
+++ incubator/droids/trunk/src/java/org/apache/droids/robot/crawler/CrawlingWorker.java Thu Nov  6 05:30:18 2008
@@ -20,7 +20,7 @@
 import java.io.InputStream;
 import java.net.URL;
 import java.util.Collection;
-import java.util.HashMap;
+import java.util.LinkedHashMap;
 import java.util.Map;
 
 import org.apache.droids.api.Link;
@@ -105,7 +105,7 @@
     URLFiltersFactory filters = droid.getFiltersFactory();
    
     // TODO -- make the hashvalue for Outlink...
-    Map<String,Link> filtered = new HashMap<String,Link>();
+    Map<String,Link> filtered = new LinkedHashMap<String,Link>();
     for( Link outlink : parse.getData().getOutlinks() ) {
       String id = outlink.getId();
       if (filters.accept(outlink.getId()) && !filtered.containsKey(id)) {

Added: incubator/droids/trunk/src/test/java/org/apache/droids/DroidsFactory.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/src/test/java/org/apache/droids/DroidsFactory.java?rev=711854&view=auto
==============================================================================
--- incubator/droids/trunk/src/test/java/org/apache/droids/DroidsFactory.java (added)
+++ incubator/droids/trunk/src/test/java/org/apache/droids/DroidsFactory.java Thu Nov  6 05:30:18 2008
@@ -0,0 +1,97 @@
+package org.apache.droids;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+
+import org.apache.droids.api.Droid;
+import org.apache.droids.api.Handler;
+import org.apache.droids.api.Link;
+import org.apache.droids.api.URLFilter;
+import org.apache.droids.delay.SimpleDelayTimer;
+import org.apache.droids.helper.factories.HandlerFactory;
+import org.apache.droids.helper.factories.ParserFactory;
+import org.apache.droids.helper.factories.ProtocolFactory;
+import org.apache.droids.helper.factories.URLFiltersFactory;
+import org.apache.droids.impl.SequentialTaskMaster;
+import org.apache.droids.impl.SimpleTaskQueue;
+import org.apache.droids.parse.html.HtmlParser;
+import org.apache.droids.protocol.http.Http;
+import org.apache.droids.robot.crawler.CrawlingDroid;
+
+public class DroidsFactory
+{
+
+  public static ParserFactory createDefaultParserFactory() {
+    ParserFactory parserFactory = new ParserFactory();
+    HtmlParser htmlParser = new HtmlParser();
+    htmlParser.setElements(new HashMap<String, String>());
+    htmlParser.getElements().put("a", "href");
+    htmlParser.getElements().put("link", "href");
+    htmlParser.getElements().put("img", "src");
+    htmlParser.getElements().put("script", "src");
+    parserFactory.setMap(new HashMap<String, Object>());
+    parserFactory.getMap().put("text/html", htmlParser);
+    return parserFactory;
+  }
+
+  public static ProtocolFactory createDefaultProtocolFactory() {
+    ProtocolFactory protocolFactory = new ProtocolFactory();
+    Http httpProtocol = new Http();
+    httpProtocol.setForceAllow(true);
+    httpProtocol.setUserAgent("Droids/1.1");
+    
+    protocolFactory.setMap(new HashMap<String, Object>());
+    protocolFactory.getMap().put("http", httpProtocol);
+    return protocolFactory; 
+  }
+  
+  public static URLFiltersFactory createDefaultURLFiltersFactory() {
+    URLFiltersFactory filtersFactory = new URLFiltersFactory();
+    URLFilter defaultURLFilter = new URLFilter() {
+
+      public String filter(String urlString) {
+        return urlString;
+      }
+      
+    };
+    filtersFactory.setMap(new HashMap<String, Object>());
+    filtersFactory.getMap().put("default", defaultURLFilter);
+    return filtersFactory;
+  }
+  
+  public static HandlerFactory createDefaultHandlerFactory(Handler defaultHandler) {
+    HandlerFactory handlerFactory = new HandlerFactory();
+    handlerFactory.setMap(new HashMap<String, Object>());
+    handlerFactory.getMap().put("default", defaultHandler);
+    return handlerFactory; 
+  }
+  
+  public static Droid<Link> createSimpleCrawlingDroid(
+      String targetURI, Handler testHandler) {
+    ParserFactory parserFactory = createDefaultParserFactory();
+    ProtocolFactory protocolFactory = createDefaultProtocolFactory();
+    URLFiltersFactory filtersFactory = createDefaultURLFiltersFactory();
+    HandlerFactory handlerFactory = createDefaultHandlerFactory(testHandler);
+
+    SimpleDelayTimer simpleDelayTimer = new SimpleDelayTimer();
+    simpleDelayTimer.setDelayMillis(100);
+    
+    SimpleTaskQueue<Link> simpleQueue = new SimpleTaskQueue<Link>();
+
+    SequentialTaskMaster<Link> taskMaster = new SequentialTaskMaster<Link>();
+    taskMaster.setDelayTimer( simpleDelayTimer );
+    
+    CrawlingDroid crawler = new CrawlingDroid( simpleQueue, taskMaster );
+    crawler.setFiltersFactory(filtersFactory);
+    crawler.setParserFactory(parserFactory);
+    crawler.setProtocolFactory(protocolFactory);
+    crawler.setHandlerFactory(handlerFactory);
+    
+    Collection<String> initialLocations = new ArrayList<String>();
+    initialLocations.add( targetURI );
+    crawler.setInitialLocations(initialLocations);
+    return crawler;
+  }
+  
+}

Modified: incubator/droids/trunk/src/test/java/org/apache/droids/impl/TestSimpleDroid.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/src/test/java/org/apache/droids/impl/TestSimpleDroid.java?rev=711854&r1=711853&r2=711854&view=diff
==============================================================================
--- incubator/droids/trunk/src/test/java/org/apache/droids/impl/TestSimpleDroid.java (original)
+++ incubator/droids/trunk/src/test/java/org/apache/droids/impl/TestSimpleDroid.java Thu Nov  6 05:30:18 2008
@@ -20,29 +20,23 @@
 import java.io.InputStream;
 import java.net.URL;
 import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
 import java.util.HashSet;
+import java.util.List;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
 
 import junit.framework.Assert;
 
+import org.apache.droids.DroidsFactory;
+import org.apache.droids.api.Droid;
+import org.apache.droids.api.Handler;
 import org.apache.droids.api.Link;
 import org.apache.droids.api.Parse;
-import org.apache.droids.api.Handler;
-import org.apache.droids.delay.SimpleDelayTimer;
-import org.apache.droids.helper.factories.DroidFactory;
-import org.apache.droids.helper.factories.HandlerFactory;
-import org.apache.droids.helper.factories.ParserFactory;
-import org.apache.droids.helper.factories.ProtocolFactory;
-import org.apache.droids.helper.factories.URLFiltersFactory;
+import org.apache.droids.api.TaskExceptionHandler;
+import org.apache.droids.api.TaskExceptionResult;
+import org.apache.droids.api.TaskMaster;
 import org.apache.droids.localserver.LocalHttpServer;
 import org.apache.droids.localserver.ResourceHandler;
-import org.apache.droids.net.RegexURLFilter;
-import org.apache.droids.parse.html.HtmlParser;
-import org.apache.droids.protocol.http.Http;
-import org.apache.droids.robot.crawler.CrawlingDroid;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -71,77 +65,81 @@
     String baseURI = "http:/" + this.testserver.getServiceAddress();     
     String targetURI = baseURI + "/start_html";     
     
-    final Set<URL> visitedLinks = new HashSet<URL>();
-    
-    ParserFactory parserFactory = new ParserFactory();
-    HtmlParser htmlParser = new HtmlParser();
-    htmlParser.setElements(new HashMap<String, String>());
-    htmlParser.getElements().put("a", "href");
-    htmlParser.getElements().put("link", "href");
-    htmlParser.getElements().put("img", "src");
-    htmlParser.getElements().put("script", "src");
-    parserFactory.setMap(new HashMap<String, Object>());
-    parserFactory.getMap().put("text/html", htmlParser);
-
-    ProtocolFactory protocolFactory = new ProtocolFactory();
-    Http httpProtocol = new Http();
-    httpProtocol.setForceAllow(true);
-    httpProtocol.setUserAgent("Droids/1.1");
-    
-    protocolFactory.setMap(new HashMap<String, Object>());
-    protocolFactory.getMap().put("http", httpProtocol);
-    
-    URLFiltersFactory filtersFactory = new URLFiltersFactory();
-    RegexURLFilter defaultURLFilter = new RegexURLFilter();
-    defaultURLFilter.setFile("classpath:/regex-urlfilter.txt");
-    filtersFactory.setMap(new HashMap<String, Object>());
-    filtersFactory.getMap().put("default", defaultURLFilter);
-    
-    HandlerFactory handlerFactory = new HandlerFactory();
-    Handler defaultHandler = new Handler() {
+    final List<URL> visitedLinks = new ArrayList<URL>();
+
+    Handler testHandler = new Handler() {
 
       public void handle(InputStream openStream, URL url, Parse parse) {
         visitedLinks.add(url); 
       }
-      
+  
     };
-    handlerFactory.setMap(new HashMap<String, Object>());
-    handlerFactory.getMap().put("default", defaultHandler);
     
-    DroidFactory<Link> droidFactory = new DroidFactory<Link>();
-    droidFactory.setMap(new HashMap<String, Object>());
+    Droid<Link> droid = DroidsFactory.createSimpleCrawlingDroid(
+        targetURI, 
+        testHandler);    
+    
+    droid.init();
+    droid.start();
+    droid.getTaskMaster().awaitTermination(30, TimeUnit.SECONDS);
     
-    SimpleDelayTimer simpleDelayTimer = new SimpleDelayTimer();
-    simpleDelayTimer.setDelayMillis(100);
-    
-    SimpleTaskQueue<Link> simpleQueue = new SimpleTaskQueue<Link>();
-
-    MultiThreadedTaskMaster<Link> taskMaster = new MultiThreadedTaskMaster<Link>();
-    taskMaster.setMaxThreads( 1 );
-    taskMaster.setDelayTimer( simpleDelayTimer );
+    Assert.assertFalse(visitedLinks.isEmpty());
+    Assert.assertEquals(5, visitedLinks.size());
+    Assert.assertEquals(new URL(baseURI + "/start_html"), visitedLinks.get(0));
+    Assert.assertEquals(new URL(baseURI + "/page1_html"), visitedLinks.get(1));
+    Assert.assertEquals(new URL(baseURI + "/page2_html"), visitedLinks.get(2));
+    Assert.assertEquals(new URL(baseURI + "/page3_html"), visitedLinks.get(3));
+    Assert.assertEquals(new URL(baseURI + "/page4_html"), visitedLinks.get(4));
+  }
+  
+  @Test
+  public void testTerminateCrawlingOnException() throws Exception
+  {
+    this.testserver.register("*", new ResourceHandler());
+    this.testserver.start();
     
-    CrawlingDroid helloCrawler = new CrawlingDroid( simpleQueue, taskMaster );
-    helloCrawler.setFiltersFactory(filtersFactory);
-    helloCrawler.setParserFactory(parserFactory);
-    helloCrawler.setProtocolFactory(protocolFactory);
-    helloCrawler.setHandlerFactory(handlerFactory);
+    String baseURI = "http:/" + this.testserver.getServiceAddress();     
+    String targetURI = baseURI + "/start_html";     
     
-    Collection<String> initialLocations = new ArrayList<String>();
-    initialLocations.add( targetURI );
-    helloCrawler.setInitialLocations(initialLocations);
+    final Set<URL> visitedLinks = new HashSet<URL>();
+
+    Handler testHandler = new Handler() {
+
+      public void handle(InputStream openStream, URL url, Parse parse) {
+        visitedLinks.add(url);
+        if (url.getPath().equals("/page3_html")) {
+          throw new RuntimeException("Oppsie!!!");
+        }
+      }
+  
+    };
     
-    helloCrawler.init();
-    helloCrawler.start();
+    Droid<Link> droid = DroidsFactory.createSimpleCrawlingDroid(
+        targetURI, 
+        testHandler);    
+    
+    SequentialTaskMaster<Link> taskMaster = (SequentialTaskMaster<Link>) droid.getTaskMaster();
+    taskMaster.setExceptionHandler(new TaskExceptionHandler() {
+
+      public TaskExceptionResult handleException(Exception ex) {
+        if (ex instanceof RuntimeException) {
+          return TaskExceptionResult.FATAL;
+        }
+        return TaskExceptionResult.WARN;
+      }
+      
+    });
     
-    helloCrawler.getTaskMaster().awaitTermination(30, TimeUnit.SECONDS);
+    droid.init();
+    droid.start();
+    droid.getTaskMaster().awaitTermination(30, TimeUnit.SECONDS);
     
     Assert.assertFalse(visitedLinks.isEmpty());
-    Assert.assertEquals(5, visitedLinks.size());
+    Assert.assertEquals(4, visitedLinks.size());
     Assert.assertTrue(visitedLinks.contains(new URL(baseURI + "/start_html")));
     Assert.assertTrue(visitedLinks.contains(new URL(baseURI + "/page1_html")));
     Assert.assertTrue(visitedLinks.contains(new URL(baseURI + "/page2_html")));
     Assert.assertTrue(visitedLinks.contains(new URL(baseURI + "/page3_html")));
-    Assert.assertTrue(visitedLinks.contains(new URL(baseURI + "/page4_html")));
   }
-  
+
 }