You are viewing a plain text version of this content. The canonical link for it is here.
Posted to droids-commits@incubator.apache.org by to...@apache.org on 2012/12/07 17:13:15 UTC
svn commit: r1418399 [1/2] - in /incubator/droids/branches/0.2.x-cleanup: ./
droids-core/src/main/java/org/apache/droids/core/
droids-core/src/main/java/org/apache/droids/filter/
droids-core/src/main/java/org/apache/droids/handle/ droids-core/src/main/...
Author: tobr
Date: Fri Dec 7 17:12:35 2012
New Revision: 1418399
URL: http://svn.apache.org/viewvc?rev=1418399&view=rev
Log:
hide factory calls from API
simplified API
Added:
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/ContentEntity.java (with props)
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Parser.java
- copied, changed from r1417006, incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/Parser.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/FilterFactory.java
- copied, changed from r1417023, incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/URLFiltersFactory.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/util/
- copied from r1417006, incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/util/
incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/
incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingDroid.java (contents, props changed)
- copied, changed from r1406628, incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/robot/crawler/CrawlingDroid.java
incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingWorker.java (contents, props changed)
- copied, changed from r1406628, incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/robot/crawler/CrawlingWorker.java
incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/Link.java (contents, props changed)
- copied, changed from r1406628, incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/api/Link.java
incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/LinkTask.java (contents, props changed)
- copied, changed from r1406628, incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/LinkTask.java
incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/ReportCrawlingDroid.java (contents, props changed)
- copied, changed from r1406628, incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/robot/crawler/ReportCrawlingDroid.java
incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/protocol/
incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/protocol/http/
- copied from r1406628, incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/protocol/http/
incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/localserver/
- copied from r1417006, incubator/droids/branches/0.2.x-cleanup/droids-core/src/test/java/org/apache/droids/localserver/
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/protocol/
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/protocol/file/
- copied from r1417006, incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/protocol/file/
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/
- copied from r1417006, incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/robot/walker/
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/test/java/org/apache/droids/walker/
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/test/java/org/apache/droids/walker/WalkingDroidTest.java (with props)
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/test/resources/
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/test/resources/docs/
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/test/resources/docs/L2R_Cloud/
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/test/resources/docs/L2R_Cloud/aceu-2012-apache-cloudStack-scalability.odp (with props)
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/test/resources/docs/L2R_Cloud/aceu-2012-building-cross-platform-hybrid-applications-using-AMQP-1_0-with-apache-qpid.pdf (with props)
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/test/resources/docs/L2R_Cloud/aceu-2012-building-cross-platform-hybrid-applications-using-AMQP-1_0-with-apache-qpid.pptx (with props)
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/test/resources/docs/L2R_Cloud/aceu-2012-getting-started-with-AMQP-1_0-using-apache-qpid.pdf (with props)
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/test/resources/docs/L2R_Cloud/aceu-2012-integration-in-the-cloud-IPaaS-with-fuse-technology.pdf (with props)
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/test/resources/docs/L2R_Cloud/aceu-2012-introduction-to-apache-cloudstack.odp (with props)
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/test/resources/docs/L2R_Cloud/aceu-2012-what-is-new-in-cloudstack-4.0.odp (with props)
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/test/resources/docs/PR-Lucene/
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/test/resources/docs/PR-Lucene/aceu-2012-elastic-search-in-production_lessons-learned.pdf (with props)
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/test/resources/docs/PR-Lucene/aceu-2012-fundamentals-of-information-retrieval-illustration-with-apache-lucene.pptx (with props)
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/test/resources/docs/PR-Lucene/aceu-2012-lucene-4-performance-tuning.pdf (with props)
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/test/resources/docs/PR-Lucene/aceu-2012-query-parsing-tips-and-tricks.pdf (with props)
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/test/resources/docs/PR-Lucene/aceu-2012-searching-relational-like-data-with-lucene.pdf (with props)
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/test/resources/docs/PR-Lucene/aceu-2012-solr-4-the-NoSQL-database.pdf (with props)
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/test/resources/docs/PR-Lucene/aceu-2012-solr-4-the-NoSQL-database.pptx (with props)
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/test/resources/docs/PR-Lucene/aceu-2012-solrcloud-round-table.pptx (with props)
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/test/resources/docs/aceu-2012-apachecon-europe-keynote.pdf (with props)
Removed:
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/DroidFactory.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/GenericFactory.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/URLFiltersFactory.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/protocol/
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/robot/
incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/App.java
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/App.java
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/test/java/org/apache/droids/AppTest.java
Modified:
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/AbstractDroid.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Droid.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Handler.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/MultiThreadedTaskMaster.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Protocol.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Task.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Worker.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/filter/AlreadyVisitedFilter.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/filter/ChainTaskFilter.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/filter/Filter.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/filter/HostFilter.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/filter/MaxDepthTaskFilter.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/filter/RegexURLFilter.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/handle/SaveHandler.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/handle/SysoutHandler.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/HandlerFactory.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/ParserFactory.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/ProtocolFactory.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/test/java/org/apache/droids/core/SimpleTask.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/test/java/org/apache/droids/core/TestSimpleQueue.java
incubator/droids/branches/0.2.x-cleanup/droids-crawler/pom.xml
incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/protocol/http/AdvancedHttpContentEntity.java
incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/protocol/http/AdvancedHttpProtocol.java
incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/protocol/http/HttpContentEntity.java
incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/protocol/http/HttpProtocol.java
incubator/droids/branches/0.2.x-cleanup/droids-spring/src/main/resources/org/apache/droids/dynamic/droids-core-context.xml
incubator/droids/branches/0.2.x-cleanup/droids-spring/src/main/resources/org/apache/droids/dynamic/droids-core-factories-context.xml
incubator/droids/branches/0.2.x-cleanup/droids-spring/src/test/resources/droids-core-test-context.xml
incubator/droids/branches/0.2.x-cleanup/droids-walker/pom.xml
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/protocol/file/FileProtocol.java
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/FileTask.java
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/FileWorker.java
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/SimpleWalkingDroid.java
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/WalkingDroid.java
incubator/droids/branches/0.2.x-cleanup/pom.xml
Modified: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/AbstractDroid.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/AbstractDroid.java?rev=1418399&r1=1418398&r2=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/AbstractDroid.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/AbstractDroid.java Fri Dec 7 17:12:35 2012
@@ -16,42 +16,94 @@
*/
package org.apache.droids.core;
+import org.apache.droids.helper.factories.FilterFactory;
+import org.apache.droids.helper.factories.HandlerFactory;
+import org.apache.droids.helper.factories.ParserFactory;
+import org.apache.droids.helper.factories.ProtocolFactory;
+import org.apache.droids.filter.Filter;
+
+import java.io.IOException;
import java.util.Queue;
import java.util.concurrent.TimeUnit;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
/**
* Manage common tasks in standard Droids
*/
public abstract class AbstractDroid<T extends Task> implements Droid<T> {
+ protected final Queue<T> queue;
+ protected final TaskMaster<T> taskMaster;
+ protected ProtocolFactory protocolFactory;
+ protected ParserFactory parserFactory;
+ protected FilterFactory<T> filterFactory;
+ protected HandlerFactory handlerFactory;
+
+ public AbstractDroid() {
+ this.queue = new SimpleTaskQueueWithHistory<T>();
+ this.taskMaster = new MultiThreadedTaskMaster<T>();
+ this.protocolFactory = new ProtocolFactory();
+ this.parserFactory = new ParserFactory();
+ this.filterFactory = new FilterFactory();
+ this.handlerFactory = new HandlerFactory();
+ }
+
+ public AbstractDroid(Queue<T> queue, TaskMaster<T> taskMaster) {
+ this.queue = queue;
+ this.taskMaster = taskMaster;
+ }
+
+ @Override
+ public void start() {
+ taskMaster.start(queue, this);
+ try {
+ taskMaster.awaitTermination(1000, TimeUnit.MILLISECONDS);
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+ }
+
+ @Override
+ public void add(T task) {
+ queue.add(task);
+ }
+
+ @Override
+ public void parse(T task) throws DroidsException, IOException {
+ this.parserFactory.parse(task);
+ }
+
+ @Override
+ public void handle(T task) throws DroidsException, IOException {
+ this.handlerFactory.handle(task);
+ }
+
+ @Override
+ public T filter(T task) {
+ return this.filterFactory.filter(task);
+ }
+
+ public void addParsers(Parser... parsers) {
+ for (Parser parser : parsers) {
+ this.parserFactory.addParser(parser);
+ }
+ }
+
+ public void addProtocols(Protocol... protocols) {
+ for (Protocol protocol : protocols) {
+ this.protocolFactory.addProtocol(protocol);
+ }
+ }
+
+ public void addHandlers(Handler... handlers) {
+ for (Handler handler : handlers) {
+ this.handlerFactory.addHandler(handler);
+ }
+ }
+
+ public void addFilters(Filter<T>... filters) {
+ for (Filter<T> filter : filters) {
+ this.filterFactory.addFilter(filter);
+ }
+ }
+
- protected final Logger log = LoggerFactory.getLogger(AbstractDroid.class);
- protected final Queue<T> queue;
- protected final TaskMaster<T> taskMaster;
-
- public AbstractDroid(Queue<T> queue, TaskMaster<T> taskMaster) {
- this.queue = queue;
- this.taskMaster = taskMaster;
- }
-
- @Override
- public void start() {
- taskMaster.start(queue, this);
- try {
- taskMaster.awaitTermination(1000, TimeUnit.MILLISECONDS);
- } catch (InterruptedException e) {
- e.printStackTrace();
- }
- }
-
- public Queue<T> getQueue() {
- return queue;
- }
-
- @Override
- public TaskMaster<T> getTaskMaster() {
- return taskMaster;
- }
}
Added: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/ContentEntity.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/ContentEntity.java?rev=1418399&view=auto
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/ContentEntity.java (added)
+++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/ContentEntity.java Fri Dec 7 17:12:35 2012
@@ -0,0 +1,31 @@
+package org.apache.droids.core;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ *
+ *
+ *
+ */
+public class ContentEntity {
+ private Map<String, Object> data;
+
+ public final static String CONTENT = "content";
+ public final static String MIME_TYPE = "mime";
+ public final static String CONTENT_LENGTH = "content-length";
+
+ public ContentEntity() {
+ this.data = new HashMap<String, Object>();
+ }
+
+ public Object getValue(String key) {
+ return data.get(key);
+ }
+
+ public void put(String key, Object value) {
+ this.data.put(key, value);
+ }
+
+
+}
Propchange: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/ContentEntity.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/ContentEntity.java
------------------------------------------------------------------------------
svn:keywords = Author Date Id Revision
Propchange: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/ContentEntity.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Droid.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Droid.java?rev=1418399&r1=1418398&r2=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Droid.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Droid.java Fri Dec 7 17:12:35 2012
@@ -16,57 +16,61 @@
*/
package org.apache.droids.core;
-import java.util.Queue;
+import java.io.IOException;
/**
* Interface for a droid. Droid can be seen as a "project manger" that delegates
* the work to {@link Worker} units.
- * <p>
+ * <p/>
* Droids aims to be an intelligent standalone robot framework that allows to
* create and extend existing droids (robots). In the future it will offer an
- * administration application to manage and controll the different droids.
- *
+ * administration application to manage and control the different droids.
+ *
* @version 1.0
*/
public interface Droid<T extends Task> {
-
- /**
- * Initialize the queue. Can have different implementation but the main
- * groups normally are
- * <ol>
- * <li>add only one url, from which we then start crawling</li>
- * <li>add an array of start urls and then crawl them</li>
- * <li>add an array of urls as fixed subset (no further crawling done)</li>
- * </ol>
- *
- * @throws DroidsException
- */
- void init() throws DroidsException;
-
- /**
- * Invoke an instance of the worker used in the droid
- */
- void start();
-
- /**
- * Invoke when the droid has completed
- */
- void finished();
-
- /**
- * Return the tasks queue
- *
- * @return
- */
- public Queue<T> getQueue();
-
- /**
- * Ask the droid for a new worker
- */
- Worker<T> getNewWorker();
-
- /**
- * Get the task master
- */
- TaskMaster<T> getTaskMaster();
+
+ /**
+ * Invoke an instance of the worker used in the droid
+ */
+ public void start();
+
+ /**
+ * Invoke when the droid has completed
+ */
+ public void finished();
+
+ /**
+ * Add new Tasks to the queue
+ *
+ * @param task the new task
+ */
+ public void add(T task);
+
+ /**
+ * Ask the droid for a new worker
+ */
+ public Worker<T> getNewWorker();
+
+ /**
+ * Parse the task
+ *
+ * @param task the task
+ */
+ public void parse(T task) throws DroidsException, IOException;
+
+ /**
+ * Handle the task.
+ *
+ * @param task
+ */
+ public void handle(T task) throws DroidsException, IOException;
+
+ /**
+ * Filter the task.
+ *
+ * @param task
+ * @return the task or null, if the task is not valid
+ */
+ public T filter(T task);
}
Modified: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Handler.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Handler.java?rev=1418399&r1=1418398&r2=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Handler.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Handler.java Fri Dec 7 17:12:35 2012
@@ -35,5 +35,5 @@ public interface Handler {
* @throws Exception
*/
void handle(Task task)
- throws IOException, DroidsException;
+ throws DroidsException, IOException;
}
Modified: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/MultiThreadedTaskMaster.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/MultiThreadedTaskMaster.java?rev=1418399&r1=1418398&r2=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/MultiThreadedTaskMaster.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/MultiThreadedTaskMaster.java Fri Dec 7 17:12:35 2012
@@ -101,7 +101,7 @@ public class MultiThreadedTaskMaster<T e
} catch(InterruptedException ignored) {
LOG.error("", ignored);
}
- pool.execute(new TaskExecutor(droid));
+ pool.execute(new TaskExecutor(queue, droid));
}
}
@@ -303,10 +303,10 @@ public class MultiThreadedTaskMaster<T e
private final Queue<T> queue;
private final Worker<T> worker;
- public TaskExecutor(Droid<T> droid)
+ public TaskExecutor(Queue queue, Droid<T> droid)
{
this.droid = droid;
- this.queue = droid.getQueue();
+ this.queue = queue;
this.worker = droid.getNewWorker();
}
Copied: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Parser.java (from r1417006, incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/Parser.java)
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Parser.java?p2=incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Parser.java&p1=incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/Parser.java&r1=1417006&r2=1418399&rev=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/Parser.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Parser.java Fri Dec 7 17:12:35 2012
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.droids.parse;
+package org.apache.droids.core;
import java.io.IOException;
@@ -24,7 +24,6 @@ import org.apache.droids.core.Task;
/**
* Simple parser that is only forcing to return a parse object.
*
- * @see Parse
* @version 1.0
*
*/
@@ -32,11 +31,9 @@ public interface Parser {
/**
* Creates the parse for some content.
*
- * @param entity
- * the underlying stream we are using
* @param task
* the task that correspond to the stream
* @return the parse object
*/
- Parse parse(Task task) throws DroidsException, IOException;
+ public void parse(Task task) throws DroidsException, IOException;
}
Modified: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Protocol.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Protocol.java?rev=1418399&r1=1418398&r2=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Protocol.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Protocol.java Fri Dec 7 17:12:35 2012
@@ -23,30 +23,28 @@ import java.net.URI;
/**
* The protocol interface is a wrapper to hide the underlying implementation of
* the communication at protocol level.
- *
+ *
* @version 1.0
- *
*/
public interface Protocol {
- /**
- * Some protocols (like http) offer a mechanism to evaluate whether the client
- * can request a given url (in http this is the robots.txt configuration)
- *
- * @param url
- * the url to evaluate
- * @return true if we can request the url. false if we are forbidden.
- * @throws MalformedURLException
- */
- boolean isAllowed(URI url) throws IOException;
+ /**
+ * Some protocols (like http) offer a mechanism to evaluate whether the client
+ * can request a given url (in http this is the robots.txt configuration)
+ *
+ * @param url the url to evaluate
+ * @return true if we can request the url. false if we are forbidden.
+ * @throws MalformedURLException
+ */
+ boolean isAllowed(URI url) throws IOException;
- /**
- * Return the content entity represent of the url
- *
- * @param url
- * url of the stream we want to open
- * @return the content of the given url
- * @throws IOException
- */
- Task load(URI uri) throws IOException;
+ /**
+ * Return the content entity represent of the url
+ *
+ * @param url url of the stream we want to open
+ * @return the content of the given url
+ * @throws IOException
+ */
+ Task load(URI uri) throws IOException;
+ String scheme();
}
\ No newline at end of file
Modified: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Task.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Task.java?rev=1418399&r1=1418398&r2=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Task.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Task.java Fri Dec 7 17:12:35 2012
@@ -16,8 +16,6 @@
*/
package org.apache.droids.core;
-import java.io.IOException;
-import java.io.InputStream;
import java.io.Serializable;
import java.net.URI;
import java.util.Date;
@@ -27,42 +25,38 @@ import java.util.Date;
* task. That is based on the fact that a droid can extract more tasks from the
* one that it is currently working on. However sometimes one want to limit the
* number of nested task, this is what is determined by the depth.
- *
+ *
* @version 1.0
- *
*/
public interface Task extends Serializable {
- /**
- * The id of the task. In a standard crawl that is most likely the url that
- * identifies the task
- *
- * @return The id of the task
- */
- public URI getURI();
-
-
- /**
- * The content of the task.
- *
- * @return the content of the task
- * @throws IOException
- */
- public InputStream getContent() throws IOException;;
-
- /**
- *
- * @return The depth of the task
- */
- int getDepth();
-
- /**
- * When was the task created
- *
- * @return the date when the task was created.
- */
- Date getTaskDate();
-
- public void abort();
-
- public boolean isAborted();
+ /**
+ * The id of the task. In a standard crawl that is most likely the url that
+ * identifies the task
+ *
+ * @return The id of the task
+ */
+ public URI getURI();
+
+ /**
+ * The data of the task.
+ *
+ * @return a Map of data values
+ */
+ public ContentEntity getContentEntity();
+
+ /**
+ * @return The depth of the task
+ */
+ public int getDepth();
+
+ /**
+ * When was the task created
+ *
+ * @return the date when the task was created.
+ */
+ public Date getTaskDate();
+
+ public void abort();
+
+ public boolean isAborted();
}
Modified: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Worker.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Worker.java?rev=1418399&r1=1418398&r2=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Worker.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Worker.java Fri Dec 7 17:12:35 2012
@@ -19,25 +19,22 @@ package org.apache.droids.core;
import java.io.IOException;
-
-
/**
* A worker is the unit that is doing the actual work. A {@link Droid} is the
* "project manger" that delegates the work to worker units. Worker units are
* implemented as threads to scale they number if more work is to do.
*
* @version 1.0
- *
*/
public interface Worker<T extends Task> {
- /**
- * Executes a task.
- *
- * @param task
- * @throws DroidsException
- * @throws IOException
- */
- void execute( final T task ) throws DroidsException, IOException;
+ /**
+ * Executes a task.
+ *
+ * @param task
+ * @throws DroidsException
+ * @throws IOException
+ */
+ public void execute(final T task) throws DroidsException, IOException;
}
Modified: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/filter/AlreadyVisitedFilter.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/filter/AlreadyVisitedFilter.java?rev=1418399&r1=1418398&r2=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/filter/AlreadyVisitedFilter.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/filter/AlreadyVisitedFilter.java Fri Dec 7 17:12:35 2012
@@ -37,7 +37,7 @@ public class AlreadyVisitedFilter<T exte
}
@Override
- public Task filter(final T task) {
+ public T filter(final T task) {
try {
URI uri = task.getURI();
URI key = new URI(uri.getScheme(), null, uri.getHost(), uri.getPort(), uri.getPath(), null, null);
Modified: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/filter/ChainTaskFilter.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/filter/ChainTaskFilter.java?rev=1418399&r1=1418398&r2=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/filter/ChainTaskFilter.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/filter/ChainTaskFilter.java Fri Dec 7 17:12:35 2012
@@ -34,7 +34,7 @@ public final class ChainTaskFilter<T ext
}
@Override
- public Task filter(final T task) {
+ public T filter(final T task) {
for (final Filter<T> filter : this.filterChain) {
if (filter.filter(task) == null) {
return null;
Modified: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/filter/Filter.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/filter/Filter.java?rev=1418399&r1=1418398&r2=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/filter/Filter.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/filter/Filter.java Fri Dec 7 17:12:35 2012
@@ -33,9 +33,9 @@ public interface Filter<T extends Task>
* Transforms the URL: can pass the original URL through or "delete" the URL
* by returning null
*
- * @param urlString
- * the url to filter
+ * @param task the task to filter
+ *
* @return null if the filter excludes the url or the url again if allowed
*/
- Task filter(T task);
+ public T filter(T task);
}
Modified: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/filter/HostFilter.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/filter/HostFilter.java?rev=1418399&r1=1418398&r2=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/filter/HostFilter.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/filter/HostFilter.java Fri Dec 7 17:12:35 2012
@@ -47,7 +47,7 @@ public class HostFilter<T extends Task>
}
@Override
- public Task filter(final T task) {
+ public T filter(final T task) {
if (this.allowedHosts.contains(task.getURI().getHost())) {
return task;
} else {
Modified: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/filter/MaxDepthTaskFilter.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/filter/MaxDepthTaskFilter.java?rev=1418399&r1=1418398&r2=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/filter/MaxDepthTaskFilter.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/filter/MaxDepthTaskFilter.java Fri Dec 7 17:12:35 2012
@@ -33,7 +33,7 @@ public class MaxDepthTaskFilter<T extend
}
@Override
- public Task filter(final T task) {
+ public T filter(final T task) {
if (maxDepth > 0 && task.getDepth() > maxDepth) {
return null;
}
Modified: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/filter/RegexURLFilter.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/filter/RegexURLFilter.java?rev=1418399&r1=1418398&r2=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/filter/RegexURLFilter.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/filter/RegexURLFilter.java Fri Dec 7 17:12:35 2012
@@ -67,7 +67,7 @@ public class RegexURLFilter<T extends Ta
* @see org.apache.droids.api.URLFilter#filter(java.lang.String)
*/
@Override
- public Task filter(T task) {
+ public T filter(T task) {
synchronized (rules) {
for (RegexRule rule : rules) {
if (rule.match(task.getURI().toString())) {
Modified: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/handle/SaveHandler.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/handle/SaveHandler.java?rev=1418399&r1=1418398&r2=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/handle/SaveHandler.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/handle/SaveHandler.java Fri Dec 7 17:12:35 2012
@@ -25,8 +25,11 @@ import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
+import org.apache.droids.core.ContentEntity;
+import org.apache.droids.core.DroidsException;
import org.apache.droids.core.Handler;
import org.apache.droids.core.Task;
+import org.apache.droids.exception.InvalidTaskException;
import org.apache.droids.util.FileUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -54,20 +57,21 @@ public class SaveHandler extends WriterH
/**
* Handle saving content.
*
- * @param uri
- * the uri we are currently processing
- * @param entity
- * the entity to save
- *
- * @throws IOException
- * on error
+ * @param task the task to handle
+ *
+ * @throws IOException on file error
+ * @throws DroidsException on content entity error
*/
@Override
- public void handle(Task task) throws IOException {
+ public void handle(Task task) throws IOException, DroidsException {
this.uri = task.getURI();
- InputStream instream = task.getContent();
- String path = saveContentHandlerStrategy.calculateFilePath(uri, task);
+ InputStream instream;
+ if (task.getContentEntity().getValue(ContentEntity.CONTENT) instanceof InputStream) {
+ instream = (InputStream)task.getContentEntity().getValue(ContentEntity.CONTENT);
+ } else
+ throw new InvalidTaskException("no inputstream available");
try {
+ String path = saveContentHandlerStrategy.calculateFilePath(uri, task);
writeOutput(path, instream);
} finally {
instream.close();
@@ -108,7 +112,7 @@ public class SaveHandler extends WriterH
* on error
*/
private void writeContentToFile(InputStream stream, File cache)
- throws FileNotFoundException, IOException {
+ throws IOException {
OutputStream output = null;
final int bufferSize = 8192;
byte[] buffer = new byte[bufferSize];
Modified: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/handle/SysoutHandler.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/handle/SysoutHandler.java?rev=1418399&r1=1418398&r2=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/handle/SysoutHandler.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/handle/SysoutHandler.java Fri Dec 7 17:12:35 2012
@@ -23,8 +23,11 @@ import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
+import org.apache.droids.core.ContentEntity;
+import org.apache.droids.core.DroidsException;
import org.apache.droids.core.Handler;
import org.apache.droids.core.Task;
+import org.apache.droids.exception.InvalidTaskException;
/**
* Handler that write the stream to the sysout. Mostly added for debugging
@@ -42,8 +45,12 @@ public class SysoutHandler extends Write
}
@Override
- public void handle(Task task) throws IOException {
- InputStream instream = task.getContent();
+ public void handle(Task task) throws IOException, DroidsException {
+ InputStream instream;
+ if (task.getContentEntity().getValue(ContentEntity.CONTENT) instanceof InputStream) {
+ instream = (InputStream)task.getContentEntity().getValue(ContentEntity.CONTENT);
+ } else
+ throw new InvalidTaskException("no inputstream available");
try {
writeOutput(instream);
} finally {
Copied: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/FilterFactory.java (from r1417023, incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/URLFiltersFactory.java)
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/FilterFactory.java?p2=incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/FilterFactory.java&p1=incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/URLFiltersFactory.java&r1=1417023&r2=1418399&rev=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/URLFiltersFactory.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/FilterFactory.java Fri Dec 7 17:12:35 2012
@@ -19,60 +19,24 @@ package org.apache.droids.helper.factori
import org.apache.droids.core.Task;
import org.apache.droids.filter.Filter;
+import java.util.Set;
+
/**
* Factory that will traverse all registered filter and execute them.
- *
+ *
* @version 1.0
- *
*/
-public class URLFiltersFactory extends GenericFactory<Filter> {
-
- /**
- * Run all defined filters. Assume logical AND.
- *
- * @param task
- * the current Task
- * @return true if filter plugin accept the url, false if excluded.
- */
- public boolean accept(Task task) {
- if (task == null) {
- return false;
- }
-
- for (String key : getMap().keySet()) {
- if (!accept(task, key)) {
- return false;
- }
- }
- return true;
- }
+public class FilterFactory<T extends Task> {
+ private Set<Filter> filters;
- /**
- * Run a specific filter class.
- *
- * @param task
- * the task to test
- * @param filterName
- * - name of the specific filter class.
- * @return true if filter plugin accept the url, false if excluded.
- */
- public boolean accept(Task task, String filterName) {
- if (task == null || doFilter(task, filterName) == null) {
- return false;
- }
- return true;
- }
+ public <T extends Task> void addFilter(Filter<T> filter) {
+ filters.add(filter);
+ }
- /**
- * Check string against filters list
- *
- * @param task
- * - task to test
- * @param filterName
- * - name of the specific filter class.
- * @return the URL if it's allowed, NULL otherwise
- */
- protected Task doFilter(Task task, String filterName) {
- return getMap().get(filterName).filter(task);
- }
+ public <T extends Task> T filter(T task) {
+ for (Filter<T> filter : filters) {
+ // task =
+ }
+ return task;
+ }
}
Modified: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/HandlerFactory.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/HandlerFactory.java?rev=1418399&r1=1418398&r2=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/HandlerFactory.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/HandlerFactory.java Fri Dec 7 17:12:35 2012
@@ -17,6 +17,7 @@
package org.apache.droids.helper.factories;
import java.io.IOException;
+import java.util.Set;
import org.apache.droids.core.DroidsException;
import org.apache.droids.core.Handler;
@@ -28,26 +29,24 @@ import org.apache.droids.core.Task;
* @version 1.0
*
*/
-public class HandlerFactory extends GenericFactory<Handler> {
+public class HandlerFactory {
+ private Set<Handler> handlers;
+
+ public void addHandler(Handler handler) {
+ handlers.add(handler);
+ }
/**
* Will traverse all registered handler and execute them. If we encounter a
* problem we directly return false and leave.
*
- * @param stream
- * the underlying stream
- * @param url
- * the underlying url
- * @param parse
- * the underlying parse object
- * @return false if we found a problem, true if all went well
+ * @param task the task to handle
*/
- public boolean handle(Task task)
+ public void handle(Task task)
throws DroidsException, IOException {
- for (Handler handler : getMap().values()) {
+ for (Handler handler : handlers) {
handler.handle(task);
}
- return true;
}
}
Modified: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/ParserFactory.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/ParserFactory.java?rev=1418399&r1=1418398&r2=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/ParserFactory.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/ParserFactory.java Fri Dec 7 17:12:35 2012
@@ -17,30 +17,28 @@
package org.apache.droids.helper.factories;
import org.apache.droids.core.DroidsException;
-import org.apache.droids.parse.Parser;
-import org.apache.droids.parse.ParserNotFoundException;
+import org.apache.droids.core.Parser;
+import org.apache.droids.core.Task;
+
+import java.io.IOException;
+import java.util.Set;
/**
* Factory that will lookup a parser by its identifier and return it.
- *
+ *
* @version 1.0
- *
*/
-public class ParserFactory extends GenericFactory<Parser> {
+public class ParserFactory {
+ private Set<Parser> parsers;
+
+ public void addParser(Parser parser) {
+ this.parsers.add(parser);
+ }
- /**
- * Lookup a parser by its identifier (content type) and return it.
- *
- * @param contentType
- * for which content type we need a parser
- * @return null if we do not find a registered Parser otherwise the Parser
- * @throws DroidsException
- */
- public Parser getParser(String contentType) throws DroidsException {
- if (contentType == null) {
- throw new ParserNotFoundException(contentType);
+ public void parse(Task task) throws DroidsException, IOException {
+ for (Parser parser : parsers) {
+ parser.parse(task);
+ }
}
- return getMap().get(contentType);
- }
}
Modified: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/ProtocolFactory.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/ProtocolFactory.java?rev=1418399&r1=1418398&r2=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/ProtocolFactory.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/ProtocolFactory.java Fri Dec 7 17:12:35 2012
@@ -16,39 +16,42 @@
*/
package org.apache.droids.helper.factories;
-import java.net.URI;
-
import org.apache.droids.core.Protocol;
import org.apache.droids.exception.ProtocolNotFoundException;
+import java.net.URI;
+import java.util.Set;
+
/**
* Factory that will lookup a protocol plugin and return it.
- *
+ *
* @version 1.0
- *
*/
-public class ProtocolFactory extends GenericFactory<Protocol> {
+public class ProtocolFactory {
+ private Set<Protocol> protocols;
+
+ public void addProtocol(Protocol protocol) {
+ protocols.add(protocol);
+ }
+
- /**
- * Will lookup a protocol based on the underlying uri
- *
- * @param uri
- * the string that contains the protocol
- * @return ready to use protocol plugin or null if non have been found
- * @throws ProtocolNotFoundException
- */
- public Protocol getProtocol(URI uri) throws ProtocolNotFoundException {
- Protocol protocol = null;
- try {
- String protocolName = uri.getScheme();
- if (protocolName == null) {
+ /**
+ * Will lookup a protocol based on the underlying uri
+ *
+ * @param uri the string that contains the protocol
+ * @return ready to use protocol plugin or null if non have been found
+ * @throws ProtocolNotFoundException
+ */
+ public Protocol getProtocol(URI uri) throws ProtocolNotFoundException {
+ String scheme = uri.getScheme();
+ if (scheme == null) {
+ throw new ProtocolNotFoundException(uri);
+ }
+ for (Protocol protocol : protocols) {
+ if (protocol.scheme().equals(scheme))
+ return protocol;
+ }
throw new ProtocolNotFoundException(uri);
- }
- protocol = getMap().get(protocolName);
- } catch (ProtocolNotFoundException e) {
- throw new ProtocolNotFoundException(uri, e.toString());
}
- return protocol;
- }
}
Modified: incubator/droids/branches/0.2.x-cleanup/droids-core/src/test/java/org/apache/droids/core/SimpleTask.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/test/java/org/apache/droids/core/SimpleTask.java?rev=1418399&r1=1418398&r2=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/test/java/org/apache/droids/core/SimpleTask.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/test/java/org/apache/droids/core/SimpleTask.java Fri Dec 7 17:12:35 2012
@@ -9,6 +9,7 @@ public class SimpleTask implements Task
private URI uri;
private int depth;
private boolean aborted;
+ private ContentEntity contentEntity;
private static final long serialVersionUID = 2506491803180939447L;
@@ -16,19 +17,20 @@ public class SimpleTask implements Task
this.uri = uri;
this.depth = depth;
this.aborted = false;
+ this.contentEntity = new ContentEntity();
}
-
- @Override
+
+ @Override
+ public ContentEntity getContentEntity() {
+ return contentEntity;
+ }
+
+ @Override
public URI getURI() {
return uri;
}
@Override
- public InputStream getContent() throws IOException {
- return null;
- }
-
- @Override
public int getDepth() {
return depth;
}
Modified: incubator/droids/branches/0.2.x-cleanup/droids-core/src/test/java/org/apache/droids/core/TestSimpleQueue.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/test/java/org/apache/droids/core/TestSimpleQueue.java?rev=1418399&r1=1418398&r2=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/test/java/org/apache/droids/core/TestSimpleQueue.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/test/java/org/apache/droids/core/TestSimpleQueue.java Fri Dec 7 17:12:35 2012
@@ -16,40 +16,39 @@
*/
package org.apache.droids.core;
-import java.io.File;
+import java.net.URI;
import junit.framework.Assert;
import org.apache.droids.filter.MaxDepthTaskFilter;
-import org.apache.droids.robot.walker.FileTask;
import org.junit.Before;
import org.junit.Test;
public class TestSimpleQueue {
- MaxDepthTaskFilter<FileTask> filter;
+ MaxDepthTaskFilter<SimpleTask> filter;
@Before
public final void initialize() {
- filter = new MaxDepthTaskFilter<FileTask>();
+ filter = new MaxDepthTaskFilter<SimpleTask>();
filter.setMaxDepth(5);
}
@Test
public void whenTaskBelowMaxDepthIsValidated_thenTaskIsValid() throws Exception {
- final FileTask task = new FileTask(new File(""), 3);
+ final SimpleTask task = new SimpleTask(new URI("http://www.example.com"), 3);
Assert.assertNotNull(filter.filter(task));
}
@Test
public void whenTaskEqualToMaxDepthIsValidated_thenTaskIsValid() throws Exception {
- final FileTask task = new FileTask(new File(""), 5);
+ final SimpleTask task = new SimpleTask(new URI("http://www.example.com"), 5);
Assert.assertNotNull(filter.filter(task));
}
@Test
public void whenTaskOverMaxDepthIsValidated_thenTaskIsNotValid() throws Exception {
- final FileTask task = new FileTask(new File(""), 7);
+ final SimpleTask task = new SimpleTask(new URI("http://www.example.com"), 7);
Assert.assertNull(filter.filter(task));
}
Modified: incubator/droids/branches/0.2.x-cleanup/droids-crawler/pom.xml
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/pom.xml?rev=1418399&r1=1418398&r2=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-crawler/pom.xml (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/pom.xml Fri Dec 7 17:12:35 2012
@@ -7,16 +7,15 @@
<artifactId>droids</artifactId>
<version>0.3.0-incubating-SNAPSHOT</version>
</parent>
- <groupId>org.apache.droids</groupId>
<artifactId>droids-crawler</artifactId>
- <version>0.3.0-incubating-SNAPSHOT</version>
<name>droids-crawler</name>
- <url>http://maven.apache.org</url>
- <properties>
- <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
- </properties>
<dependencies>
<dependency>
+ <groupId>org.apache.droids</groupId>
+ <artifactId>droids-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
Copied: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingDroid.java (from r1406628, incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/robot/crawler/CrawlingDroid.java)
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingDroid.java?p2=incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingDroid.java&p1=incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/robot/crawler/CrawlingDroid.java&r1=1406628&r2=1418399&rev=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/robot/crawler/CrawlingDroid.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingDroid.java Fri Dec 7 17:12:35 2012
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.droids.robot.crawler;
+package org.apache.droids.crawler;
import java.net.URI;
import java.net.URISyntaxException;
@@ -23,103 +23,49 @@ import java.util.Collection;
import com.google.common.base.Preconditions;
import java.util.Queue;
-import org.apache.droids.AbstractDroid;
-import org.apache.droids.LinkTask;
-import org.apache.droids.api.Link;
-import org.apache.droids.api.TaskMaster;
-import org.apache.droids.api.TaskValidator;
-import org.apache.droids.api.Worker;
+import org.apache.droids.core.AbstractDroid;
+import org.apache.droids.core.TaskMaster;
+import org.apache.droids.core.Worker;
import org.apache.droids.exception.InvalidTaskException;
-import org.apache.droids.helper.factories.ParserFactory;
-import org.apache.droids.helper.factories.ProtocolFactory;
-import org.apache.droids.helper.factories.URLFiltersFactory;
-
-public abstract class CrawlingDroid extends AbstractDroid<Link>
-{
-
- private Collection<String> initialLocations;
- ProtocolFactory protocolFactory;
- ParserFactory parserFactory;
- URLFiltersFactory filtersFactory;
- private TaskValidator<Link> linkValidator;
-
- public CrawlingDroid(Queue<Link> queue, TaskMaster<Link> taskMaster)
- {
- super(queue, taskMaster);
- }
-
- public void setInitialLocations(Collection<String> initialLocations)
- {
- this.initialLocations = initialLocations;
- }
-
- @Override
- public void init() throws InvalidTaskException
- {
- Preconditions.checkState(initialLocations != null, "WebCrawlerDroid requires at least one starting file");
- Preconditions.checkState(!initialLocations.isEmpty(), "WebCrawlerDroid requires at least one starting file");
- for (String location : initialLocations) {
- URI uri;
- try {
- uri = new URI(location);
- } catch (URISyntaxException ex) {
- throw new InvalidTaskException("Invalid lication: " + location);
- }
- queue.offer(new LinkTask(null, uri, 0));
- }
- }
-
- public void start()
- {
- taskMaster.start(queue, this);
- }
-
- @Override
- public void finished()
- {
- log.info("FINISHED!!!");
- }
-
- public abstract Worker<Link> getNewWorker();
-
- public ProtocolFactory getProtocolFactory()
- {
- return protocolFactory;
- }
-
- public void setProtocolFactory(ProtocolFactory protocolFactory)
- {
- this.protocolFactory = protocolFactory;
- }
-
- public ParserFactory getParserFactory()
- {
- return parserFactory;
- }
-
- public void setParserFactory(ParserFactory parserFactory)
- {
- this.parserFactory = parserFactory;
- }
-
- public URLFiltersFactory getFiltersFactory()
- {
- return filtersFactory;
- }
-
- public void setFiltersFactory(URLFiltersFactory filtersFactory)
- {
- this.filtersFactory = filtersFactory;
- }
-
- public void setLinkValidator(TaskValidator<Link> linkValidator)
- {
- this.linkValidator = linkValidator;
- }
-
- public TaskValidator<Link> getLinkValidator()
- {
- return linkValidator;
- }
-
+
+public abstract class CrawlingDroid extends AbstractDroid<Link> {
+
+ private Collection<String> initialLocations;
+
+ public CrawlingDroid(Queue<Link> queue, TaskMaster<Link> taskMaster) {
+ super(queue, taskMaster);
+ }
+
+ public void setInitialLocations(Collection<String> initialLocations) {
+ this.initialLocations = initialLocations;
+ }
+
+ @Override
+ public void init() throws InvalidTaskException {
+ Preconditions.checkState(initialLocations != null,
+ "WebCrawlerDroid requires at least one starting file");
+ Preconditions.checkState(!initialLocations.isEmpty(),
+ "WebCrawlerDroid requires at least one starting file");
+ for (String location : initialLocations) {
+ URI uri;
+ try {
+ uri = new URI(location);
+ } catch (URISyntaxException ex) {
+ throw new InvalidTaskException("Invalid lication: " + location);
+ }
+ queue.offer(new LinkTask(null, uri, 0));
+ }
+ }
+
+ public void start() {
+ taskMaster.start(queue, this);
+ }
+
+ @Override
+ public void finished() {
+ logger.info("FINISHED!!!");
+ }
+
+ public abstract Worker<Link> getNewWorker();
+
}
Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingDroid.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingDroid.java
------------------------------------------------------------------------------
svn:mergeinfo =
Copied: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingWorker.java (from r1406628, incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/robot/crawler/CrawlingWorker.java)
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingWorker.java?p2=incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingWorker.java&p1=incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/robot/crawler/CrawlingWorker.java&r1=1406628&r2=1418399&rev=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/robot/crawler/CrawlingWorker.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingWorker.java Fri Dec 7 17:12:35 2012
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.droids.robot.crawler;
+package org.apache.droids.crawler;
import java.io.IOException;
import java.net.URI;
@@ -22,136 +22,117 @@ import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.Map;
-import org.apache.droids.api.ContentEntity;
-import org.apache.droids.api.Link;
-import org.apache.droids.api.ManagedContentEntity;
-import org.apache.droids.api.Parse;
-import org.apache.droids.api.Parser;
-import org.apache.droids.api.Protocol;
-import org.apache.droids.api.Task;
-import org.apache.droids.api.TaskValidator;
-import org.apache.droids.api.Worker;
-import org.apache.droids.exception.DroidsException;
+import org.apache.droids.core.DroidsException;
+import org.apache.droids.core.Protocol;
+import org.apache.droids.core.Task;
+import org.apache.droids.core.Worker;
import org.apache.droids.helper.factories.HandlerFactory;
import org.apache.droids.helper.factories.URLFiltersFactory;
+import org.apache.droids.parse.Parse;
+import org.apache.droids.core.Parser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-public class CrawlingWorker implements Worker<Link>
-{
+public class CrawlingWorker implements Worker<Link> {
- private static final Logger LOG = LoggerFactory.getLogger(CrawlingWorker.class);
+ private static final Logger LOG = LoggerFactory.getLogger(CrawlingWorker.class);
- private final CrawlingDroid droid;
- HandlerFactory handlerFactory;
-
- public CrawlingWorker( CrawlingDroid droid )
- {
- this.droid = droid;
- }
-
- @Override
- public void execute(Link link) throws DroidsException, IOException
- {
- final String userAgent = this.getClass().getCanonicalName();
- if (LOG.isDebugEnabled()) {
- LOG.debug("Starting " + userAgent);
- }
- URI uri = link.getURI();
- final Protocol protocol = droid.getProtocolFactory().getProtocol(uri);
- if (protocol == null) {
- if (LOG.isWarnEnabled()) {
- LOG.warn("Unsupported protocol scheme '" + uri.getScheme() + "'");
- }
- return;
- }
-
- if (protocol.isAllowed(uri)) {
- if (LOG.isInfoEnabled()) {
- LOG.info("Loading " + uri);
- }
- ManagedContentEntity entity = null;
- try {
- entity = protocol.load(uri);
- } catch(OutOfMemoryError e) {
- LOG.error("Out of memory processing: " + uri + " skipping", e);
- throw new DroidsException(e);
- }
- try {
- String contentType = entity.getMimeType();
- if (LOG.isDebugEnabled()) {
- LOG.debug("Content type " + contentType);
- }
- if (contentType == null){
- LOG.info("Missing content type... can't parse...");
- }
- else {
- Parser parser = droid.getParserFactory().getParser(contentType);
- if( parser == null ) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("Could not find parser for " + contentType);
- }
- }
- else {
- Parse parse = parser.parse(entity, link);
- if( parse.getNewTasks() != null && parse.isFollowed() ) {
- Collection<Link> outlinks = getFilteredOutlinks( parse );
- droid.getQueue().addAll( outlinks );
- }
- entity.setParse(parse);
- handle(entity, link);
- }
- }
- } finally {
- entity.finish();
- }
- }
- else {
- if (LOG.isInfoEnabled()) {
- LOG.info("Stopping processing since"
- + " bots are not allowed for " + uri );
- }
- }
- }
-
- protected void handle(ContentEntity entity, Link link)
- throws DroidsException, IOException
- {
- getHandlerFactory().handle(link.getURI(), entity);
- }
-
- protected Collection<Link> getFilteredOutlinks( Parse parse )
- {
- URLFiltersFactory filters = droid.getFiltersFactory();
- TaskValidator< Link > linkValidator = droid.getLinkValidator();
-
- // TODO -- make the hashvalue for Outlink...
- Map<String,Link> filtered = new LinkedHashMap<String,Link>();
- for( Task outTask : parse.getNewTasks() ) {
- // only use Links, so if for some reason it isn't a Link, skip
- if( !(outTask instanceof Link)) {
- continue;
- }
- Link outlink = (Link)outTask;
- String id = outlink.getId();
- if (filters.accept(id) && !filtered.containsKey(id)) {
- if( linkValidator == null ){
- filtered.put(id,outlink);
- }
- else if( linkValidator.validate( outlink ) ){
- filtered.put(id,outlink);
- }
- }
- }
- return filtered.values();
- }
-
- public HandlerFactory getHandlerFactory() {
- return handlerFactory;
- }
-
- public void setHandlerFactory(HandlerFactory handlerFactory) {
- this.handlerFactory = handlerFactory;
- }
-}
+ private final CrawlingDroid droid;
+ HandlerFactory handlerFactory;
+ public CrawlingWorker(CrawlingDroid droid) {
+ this.droid = droid;
+ }
+
+ @Override
+ public void execute(Link link) throws DroidsException, IOException {
+ final String userAgent = this.getClass().getCanonicalName();
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Starting " + userAgent);
+ }
+ URI uri = link.getURI();
+ final Protocol protocol = droid.getProtocolFactory().getProtocol(uri);
+ if (protocol == null) {
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Unsupported protocol scheme '" + uri.getScheme() + "'");
+ }
+ return;
+ }
+
+ if (protocol.isAllowed(uri)) {
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Loading " + uri);
+ }
+// ContentEntity entity = null;
+ try {
+// entity = protocol.load(uri);
+ } catch (OutOfMemoryError e) {
+ LOG.error("Out of memory processing: " + uri + " skipping", e);
+ throw new DroidsException(e);
+ }
+ try {
+// String contentType = entity.getMimeType();
+ String contentType = "";
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Content type " + contentType);
+ }
+ if (contentType == null) {
+ LOG.info("Missing content type... can't parse...");
+ } else {
+ Parser parser = droid.getParserFactory().getParser(contentType);
+ if (parser == null) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Could not find parser for " + contentType);
+ }
+ } else {
+// Parse parse = parser.parse(entity, link);
+ Parse parse = null;
+ if (parse.getNewTasks() != null && parse.isFollowed()) {
+ Collection<Link> outlinks = getFilteredOutlinks(parse);
+ droid.getQueue().addAll(outlinks);
+ }
+// entity.setParse(parse);
+ handle(link);
+ }
+ }
+ } finally {
+// entity.finish();
+ }
+ } else {
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Stopping processing since" + " bots are not allowed for " + uri);
+ }
+ }
+ }
+
+ protected void handle(Task task) throws DroidsException, IOException {
+ getHandlerFactory().handle(task);
+ }
+
+ protected Collection<Link> getFilteredOutlinks(Parse parse) {
+ URLFiltersFactory filters = droid.getFiltersFactory();
+
+ // TODO -- make the hashvalue for Outlink...
+ Map<URI, Link> filtered = new LinkedHashMap<URI, Link>();
+ for (Task outTask : parse.getNewTasks()) {
+ // only use Links, so if for some reason it isn't a Link, skip
+ if (!(outTask instanceof Link)) {
+ continue;
+ }
+ Link outlink = (Link) outTask;
+ URI uri = outlink.getURI();
+ if (filters.accept(outlink) && !filtered.containsKey(uri)) {
+ filtered.put(uri, outlink);
+ }
+ }
+ return filtered.values();
+ }
+
+ public HandlerFactory getHandlerFactory() {
+ return handlerFactory;
+ }
+
+ public void setHandlerFactory(HandlerFactory handlerFactory) {
+ this.handlerFactory = handlerFactory;
+ }
+}
Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingWorker.java
------------------------------------------------------------------------------
svn:eol-style = native
Copied: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/Link.java (from r1406628, incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/api/Link.java)
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/Link.java?p2=incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/Link.java&p1=incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/api/Link.java&r1=1406628&r2=1418399&rev=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/api/Link.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/Link.java Fri Dec 7 17:12:35 2012
@@ -14,12 +14,14 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.droids.api;
+package org.apache.droids.crawler;
import java.net.URI;
import java.util.Collection;
import java.util.Date;
+import org.apache.droids.core.Task;
+
/**
* Simple extension of a {@link Task}. Adding from/to link, anchor text
@@ -30,11 +32,6 @@ import java.util.Date;
*/
public interface Link extends Task {
/**
- * @return the URI to this link
- */
- URI getURI();
-
- /**
* @return the Anchor text for this link
*/
String getAnchorText();
Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/Link.java
------------------------------------------------------------------------------
svn:eol-style = native
Copied: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/LinkTask.java (from r1406628, incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/LinkTask.java)
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/LinkTask.java?p2=incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/LinkTask.java&p1=incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/LinkTask.java&r1=1406628&r2=1418399&rev=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/LinkTask.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/LinkTask.java Fri Dec 7 17:12:35 2012
@@ -14,180 +14,182 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.droids;
+package org.apache.droids.crawler;
import java.net.URI;
import java.util.Collection;
import java.util.Date;
+import java.io.IOException;
+import java.io.InputStream;
import java.io.Serializable;
-import org.apache.droids.api.Link;
/**
*
- * Basic implementation for @Link.
- * LinkTasks are working instructions for URI based droids.
+ * Basic implementation for @Link. LinkTasks are working instructions for URI
+ * based droids.
*
*/
public class LinkTask implements Link, Serializable {
- private static final long serialVersionUID = -44808094386453088L;
+ private static final long serialVersionUID = -44808094386453088L;
- private Date started;
- private final int depth;
- private final URI uri;
- private final Link from;
-
- private Date lastModifedDate;
- private Collection<URI> linksTo;
- private String anchorText;
- private int weight;
- private boolean aborted = false;
-
- /**
- * Creates a new LinkTask.
- *
- * @param from
- * @param uri
- * @param depth
- */
- public LinkTask(Link from, URI uri, int depth) {
- this.from = from;
- this.uri = uri;
- this.depth = depth;
- this.started = new Date();
- }
-
- /**
- * Creates a new LinkTask.
- *
- * @param from
- * @param uri
- * @param depth
- * @param weight
- */
- public LinkTask(Link from, URI uri, int depth, int weight) {
- this.from = from;
- this.uri = uri;
- this.depth = depth;
- this.started = new Date();
- this.weight = weight;
- }
-
- /**
- * Creates a new LinkTask
- *
- * @param from
- * @param uri
- * @param depth
- * @param anchorText
- */
- public LinkTask(Link from, URI uri, int depth, String anchorText) {
- this(from, uri, depth);
- this.anchorText = anchorText;
- }
-
- @Override
- public String getId() {
- return uri.toString();
- }
-
- @Override
- public Date getTaskDate() {
- return started;
- }
-
- /**
- * Set the Date the task started.
- *
- * @param started
- */
- public void setTaskDate(Date started) {
- this.started = started;
- }
-
- @Override
- public int getDepth() {
- return depth;
- }
-
- @Override
- public Link getFrom() {
- return from;
- }
-
- @Override
- public Collection<URI> getTo() {
- return linksTo;
- }
-
- @Override
- public Date getLastModifiedDate() {
- return lastModifedDate;
- }
-
- /**
- * Set the Date the Task object was last modified.
- *
- * @param lastModifedDate
- */
- public void setLastModifedDate(Date lastModifedDate) {
- this.lastModifedDate = lastModifedDate;
- }
-
- /**
- * Set Outgoing links.
- *
- * @param linksTo
- */
- public void setLinksTo(Collection<URI> linksTo) {
- this.linksTo = linksTo;
- }
-
- @Override
- public URI getURI() {
- return uri;
- }
-
- @Override
- public String getAnchorText() {
- return anchorText;
- }
-
- /**
- * Set the anchor text for this link.
- *
- * @param anchorText
- */
- public void setAnchorText(String anchorText) {
- this.anchorText = anchorText;
- }
-
- /**
- * Get the weight of the link
- *
- * @return the links weight
- */
- public int getWeight() {
- return weight;
- }
-
- /**
- * Set the weight of the link.
- *
- * @param weight
- */
- public void setWeight(int weight) {
- this.weight = weight;
- }
-
- @Override
- public void abort() {
- aborted = true;
- }
-
- @Override
- public boolean isAborted() {
- return aborted;
- }
+ private Date started;
+ private final int depth;
+ private final URI uri;
+ private final Link from;
+
+ private Date lastModifiedDate;
+ private Collection<URI> linksTo;
+ private String anchorText;
+ private int weight;
+ private boolean aborted = false;
+
+ /**
+ * Creates a new LinkTask.
+ *
+ * @param from
+ * @param uri
+ * @param depth
+ */
+ public LinkTask(Link from, URI uri, int depth) {
+ this.from = from;
+ this.uri = uri;
+ this.depth = depth;
+ this.started = new Date();
+ }
+
+ /**
+ * Creates a new LinkTask.
+ *
+ * @param from
+ * @param uri
+ * @param depth
+ * @param weight
+ */
+ public LinkTask(Link from, URI uri, int depth, int weight) {
+ this.from = from;
+ this.uri = uri;
+ this.depth = depth;
+ this.started = new Date();
+ this.weight = weight;
+ }
+
+ /**
+ * Creates a new LinkTask
+ *
+ * @param from
+ * @param uri
+ * @param depth
+ * @param anchorText
+ */
+ public LinkTask(Link from, URI uri, int depth, String anchorText) {
+ this(from, uri, depth);
+ this.anchorText = anchorText;
+ }
+
+ @Override
+ public URI getURI() {
+ return uri;
+ }
+
+ @Override
+ public InputStream getContent() throws IOException {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public Date getTaskDate() {
+ return started;
+ }
+
+ /**
+ * Set the Date the task started.
+ *
+ * @param started
+ */
+ public void setTaskDate(Date started) {
+ this.started = started;
+ }
+
+ @Override
+ public int getDepth() {
+ return depth;
+ }
+
+ @Override
+ public Link getFrom() {
+ return from;
+ }
+
+ @Override
+ public Collection<URI> getTo() {
+ return linksTo;
+ }
+
+ @Override
+ public Date getLastModifiedDate() {
+ return lastModifiedDate;
+ }
+
+ /**
+ * Set the Date the Task object was last modified.
+ *
+ * @param lastModifiedDate
+ */
+ public void setLastModifiedDate(Date lastModifiedDate) {
+ this.lastModifiedDate = lastModifiedDate;
+ }
+
+ /**
+ * Set Outgoing links.
+ *
+ * @param linksTo
+ */
+ public void setLinksTo(Collection<URI> linksTo) {
+ this.linksTo = linksTo;
+ }
+
+ @Override
+ public String getAnchorText() {
+ return anchorText;
+ }
+
+ /**
+ * Set the anchor text for this link.
+ *
+ * @param anchorText
+ */
+ public void setAnchorText(String anchorText) {
+ this.anchorText = anchorText;
+ }
+
+ /**
+ * Get the weight of the link
+ *
+ * @return the links weight
+ */
+ public int getWeight() {
+ return weight;
+ }
+
+ /**
+ * Set the weight of the link.
+ *
+ * @param weight
+ */
+ public void setWeight(int weight) {
+ this.weight = weight;
+ }
+
+ @Override
+ public void abort() {
+ aborted = true;
+ }
+
+ @Override
+ public boolean isAborted() {
+ return aborted;
+ }
}
\ No newline at end of file
Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/LinkTask.java
------------------------------------------------------------------------------
svn:eol-style = native
Copied: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/ReportCrawlingDroid.java (from r1406628, incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/robot/crawler/ReportCrawlingDroid.java)
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/ReportCrawlingDroid.java?p2=incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/ReportCrawlingDroid.java&p1=incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/robot/crawler/ReportCrawlingDroid.java&r1=1406628&r2=1418399&rev=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/robot/crawler/ReportCrawlingDroid.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/ReportCrawlingDroid.java Fri Dec 7 17:12:35 2012
@@ -16,12 +16,11 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.apache.droids.robot.crawler;
+package org.apache.droids.crawler;
import java.util.Queue;
-import org.apache.droids.api.Link;
-import org.apache.droids.api.TaskMaster;
-import org.apache.droids.api.Worker;
+import org.apache.droids.core.TaskMaster;
+import org.apache.droids.core.Worker;
import org.apache.droids.handle.ReportHandler;
import org.apache.droids.helper.factories.HandlerFactory;
Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/ReportCrawlingDroid.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/ReportCrawlingDroid.java
------------------------------------------------------------------------------
svn:keywords = Author Date Id Revision
Propchange: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/ReportCrawlingDroid.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/protocol/http/AdvancedHttpContentEntity.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/protocol/http/AdvancedHttpContentEntity.java?rev=1418399&r1=1406628&r2=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/protocol/http/AdvancedHttpContentEntity.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/protocol/http/AdvancedHttpContentEntity.java Fri Dec 7 17:12:35 2012
@@ -23,7 +23,7 @@ import java.util.HashMap;
import java.util.Map;
import java.util.Set;
-import org.apache.droids.api.AdvancedManagedContentEntity;
+import org.apache.droids.core.AdvancedManagedContentEntity;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
Modified: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/protocol/http/AdvancedHttpProtocol.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/protocol/http/AdvancedHttpProtocol.java?rev=1418399&r1=1406628&r2=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/protocol/http/AdvancedHttpProtocol.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/protocol/http/AdvancedHttpProtocol.java Fri Dec 7 17:12:35 2012
@@ -21,7 +21,7 @@ package org.apache.droids.protocol.http;
import java.io.IOException;
import java.net.URI;
-import org.apache.droids.api.AdvancedManagedContentEntity;
+import org.apache.droids.core.AdvancedManagedContentEntity;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
Modified: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/protocol/http/HttpContentEntity.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/protocol/http/HttpContentEntity.java?rev=1418399&r1=1406628&r2=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/protocol/http/HttpContentEntity.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/protocol/http/HttpContentEntity.java Fri Dec 7 17:12:35 2012
@@ -20,8 +20,8 @@ import java.io.IOException;
import java.io.InputStream;
import java.util.Locale;
-import org.apache.droids.api.ManagedContentEntity;
-import org.apache.droids.api.Parse;
+import org.apache.droids.core.ManagedContentEntity;
+import org.apache.droids.parse.Parse;
import org.apache.http.Header;
import org.apache.http.HeaderElement;
import org.apache.http.HttpEntity;
Modified: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/protocol/http/HttpProtocol.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/protocol/http/HttpProtocol.java?rev=1418399&r1=1406628&r2=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/protocol/http/HttpProtocol.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/protocol/http/HttpProtocol.java Fri Dec 7 17:12:35 2012
@@ -20,8 +20,8 @@ import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
-import org.apache.droids.api.ManagedContentEntity;
-import org.apache.droids.api.Protocol;
+import org.apache.droids.core.ManagedContentEntity;
+import org.apache.droids.core.Protocol;
import org.apache.droids.norobots.ContentLoader;
import org.apache.droids.norobots.NoRobotClient;
import org.apache.droids.norobots.NoRobotException;
Modified: incubator/droids/branches/0.2.x-cleanup/droids-spring/src/main/resources/org/apache/droids/dynamic/droids-core-context.xml
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-spring/src/main/resources/org/apache/droids/dynamic/droids-core-context.xml?rev=1418399&r1=1418398&r2=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-spring/src/main/resources/org/apache/droids/dynamic/droids-core-context.xml (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-spring/src/main/resources/org/apache/droids/dynamic/droids-core-context.xml Fri Dec 7 17:12:35 2012
@@ -58,7 +58,7 @@
<property name="protocolFactory" ref="org.apache.droids.helper.factories.ProtocolFactory"/>
<property name="parserFactory" ref="org.apache.droids.helper.factories.ParserFactory"/>
- <property name="filtersFactory" ref="org.apache.droids.helper.factories.URLFiltersFactory"/>
+ <property name="filtersFactory" ref="org.apache.droids.helper.factories.FilterFactory"/>
</bean-->
<!-- Queue -->
<bean id="java.util.LinkedList"
Modified: incubator/droids/branches/0.2.x-cleanup/droids-spring/src/main/resources/org/apache/droids/dynamic/droids-core-factories-context.xml
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-spring/src/main/resources/org/apache/droids/dynamic/droids-core-factories-context.xml?rev=1418399&r1=1418398&r2=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-spring/src/main/resources/org/apache/droids/dynamic/droids-core-factories-context.xml (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-spring/src/main/resources/org/apache/droids/dynamic/droids-core-factories-context.xml Fri Dec 7 17:12:35 2012
@@ -30,7 +30,7 @@
<property name="parserFactory"
ref="org.apache.droids.helper.factories.ParserFactory"/>
<property name="filtersFactory"
- ref="org.apache.droids.helper.factories.URLFiltersFactory"/>
+ ref="org.apache.droids.helper.factories.FilterFactory"/>
<property name="handlerFactory"
ref="org.apache.droids.helper.factories.HandlerFactory"/>
</bean>
@@ -62,8 +62,8 @@
</property>
</bean>
- <bean id="org.apache.droids.helper.factories.URLFiltersFactory"
- class="org.apache.droids.helper.factories.URLFiltersFactory">
+ <bean id="org.apache.droids.helper.factories.FilterFactory"
+ class="org.apache.droids.helper.factories.FilterFactory">
<property name="map">
<configurator:bean-map
type="org.apache.droids.api.URLFilter" check-parent="true"
Modified: incubator/droids/branches/0.2.x-cleanup/droids-spring/src/test/resources/droids-core-test-context.xml
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-spring/src/test/resources/droids-core-test-context.xml?rev=1418399&r1=1418398&r2=1418399&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-spring/src/test/resources/droids-core-test-context.xml (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-spring/src/test/resources/droids-core-test-context.xml Fri Dec 7 17:12:35 2012
@@ -59,7 +59,7 @@
<property name="protocolFactory" ref="org.apache.droids.helper.factories.ProtocolFactory"/>
<property name="parserFactory" ref="org.apache.droids.helper.factories.ParserFactory"/>
- <property name="filtersFactory" ref="org.apache.droids.helper.factories.URLFiltersFactory"/>
+ <property name="filtersFactory" ref="org.apache.droids.helper.factories.FilterFactory"/>
</bean>
<!-- Queue -->
<bean id="java.util.LinkedList"