You are viewing a plain text version of this content. The canonical link for it is here.
Posted to droids-commits@incubator.apache.org by mi...@apache.org on 2009/09/01 22:11:33 UTC

svn commit: r810273 [1/4] - in /incubator/droids/trunk/droids-crawler: ./ docs/ docs/diagrams/ src/ src/main/ src/main/groovy/ src/main/java/ src/main/java/org/ src/main/java/org/apache/ src/main/java/org/apache/droids/ src/main/java/org/apache/droids/...

Author: mingfai
Date: Tue Sep  1 22:11:29 2009
New Revision: 810273

URL: http://svn.apache.org/viewvc?rev=810273&view=rev
Log:
imported initial droids-crawler

Added:
    incubator/droids/trunk/droids-crawler/README.txt
    incubator/droids/trunk/droids-crawler/docs/
    incubator/droids/trunk/droids-crawler/docs/diagrams/
    incubator/droids/trunk/droids-crawler/docs/diagrams/concepts.png   (with props)
    incubator/droids/trunk/droids-crawler/docs/diagrams/concepts.vsd   (with props)
    incubator/droids/trunk/droids-crawler/docs/diagrams/uml.mdzip   (with props)
    incubator/droids/trunk/droids-crawler/droids-crawler.iml
    incubator/droids/trunk/droids-crawler/pom.xml
    incubator/droids/trunk/droids-crawler/src/
    incubator/droids/trunk/droids-crawler/src/main/
    incubator/droids/trunk/droids-crawler/src/main/groovy/
    incubator/droids/trunk/droids-crawler/src/main/java/
    incubator/droids/trunk/droids-crawler/src/main/java/org/
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/Droid.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/Entity.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/Task.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/TaskMaster.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/Worker.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/thread/
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/thread/DefaultRejectedExecutionHandler.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/thread/DefaultThreadFactory.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/AbstractCrawlerController.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/AbstractCrawlerService.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/AbstractCrawlerWorker.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlerController.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlerException.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlerService.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlerWorker.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/Link.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/LocalCrawlerService.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/RoundRobinCrawlerService.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/StandaloneCrawlerController.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/extractor/
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/extractor/AbstractExtractor.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/extractor/ExtractFilter.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/extractor/Extractor.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/extractor/ExtractorException.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/extractor/HtmlElementLinkExtractor.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/AbstractFetcher.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/DefaultFetcherFactory.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/Fetcher.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/FetcherException.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/FetcherFactory.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/HttpFetcher.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/appengine/
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/appengine/AppEngineFetcher.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/delay/
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/delay/Delay.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/http/
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/http/CrawlerHttpClient.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/http/HttpHeaderSupport.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/DepthFilter.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/FetchFilter.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/Filter.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/FilterSupport.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/LinkFilter.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/ParseFilter.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/StateFilter.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/StatsFilter.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/extract/
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/extract/IncludeFilter.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/extract/RefererFilter.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/fetch/
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/fetch/HttpHeaderFilter.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/fetch/LinkAttributeFilter.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/link/
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/link/NoRepeatFilter.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/AbstractParser.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/DefaultParserFactory.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/Parser.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/ParserException.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/ParserFactory.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/impl/
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/impl/AbstractHierarchicalDataParser.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/impl/JerichoHtmlParser.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/impl/NekoHtmlParser.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/impl/SAXContentHandler.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/impl/SAXElementParser.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/impl/StAXElementParser.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/util/
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/util/CrawlerExecutorService.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/util/DefaultLinkQueue.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/util/LinkMatcher.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/util/LinkNormalizer.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/util/ParamUtils.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/util/URIResolver.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/util/WeightComparator.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/util/Weighted.java
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/util/logging/
    incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/util/logging/LogUtils.java
    incubator/droids/trunk/droids-crawler/src/main/resources/
    incubator/droids/trunk/droids-crawler/src/main/resources/log4j.xml
    incubator/droids/trunk/droids-crawler/src/main/resources/sample-crawler-service-cloud.xml
    incubator/droids/trunk/droids-crawler/src/test/
    incubator/droids/trunk/droids-crawler/src/test/groovy/
    incubator/droids/trunk/droids-crawler/src/test/groovy/org/
    incubator/droids/trunk/droids-crawler/src/test/groovy/org/apache/
    incubator/droids/trunk/droids-crawler/src/test/groovy/org/apache/droids/
    incubator/droids/trunk/droids-crawler/src/test/groovy/org/apache/droids/crawler/
    incubator/droids/trunk/droids-crawler/src/test/java/
    incubator/droids/trunk/droids-crawler/src/test/java/org/
    incubator/droids/trunk/droids-crawler/src/test/java/org/apache/
    incubator/droids/trunk/droids-crawler/src/test/java/org/apache/droids/
    incubator/droids/trunk/droids-crawler/src/test/java/org/apache/droids/crawler/
    incubator/droids/trunk/droids-crawler/src/test/java/org/apache/droids/crawler/DependencyTest.java
    incubator/droids/trunk/droids-crawler/src/test/java/org/apache/droids/crawler/DummyCrawlerService.java
    incubator/droids/trunk/droids-crawler/src/test/java/org/apache/droids/crawler/LinkTest.java
    incubator/droids/trunk/droids-crawler/src/test/java/org/apache/droids/crawler/LocalCrawlerServiceTest.java
    incubator/droids/trunk/droids-crawler/src/test/java/org/apache/droids/crawler/RoundRobinCrawlerServiceTest.java
    incubator/droids/trunk/droids-crawler/src/test/java/org/apache/droids/crawler/SimpleCrawlerControllerTest.java
    incubator/droids/trunk/droids-crawler/src/test/java/org/apache/droids/crawler/extractor/
    incubator/droids/trunk/droids-crawler/src/test/java/org/apache/droids/crawler/extractor/HtmlElementLinkExtractorTest.java
    incubator/droids/trunk/droids-crawler/src/test/java/org/apache/droids/crawler/fetcher/
    incubator/droids/trunk/droids-crawler/src/test/java/org/apache/droids/crawler/fetcher/HttpFetcherTest.java
    incubator/droids/trunk/droids-crawler/src/test/java/org/apache/droids/crawler/fetcher/appengine/
    incubator/droids/trunk/droids-crawler/src/test/java/org/apache/droids/crawler/fetcher/appengine/AppEngineFetcherTest.java
    incubator/droids/trunk/droids-crawler/src/test/java/org/apache/droids/crawler/filter/
    incubator/droids/trunk/droids-crawler/src/test/java/org/apache/droids/crawler/filter/StatsFilterTest.java
    incubator/droids/trunk/droids-crawler/src/test/java/org/apache/droids/crawler/parser/
    incubator/droids/trunk/droids-crawler/src/test/java/org/apache/droids/crawler/parser/DefaultParserFactoryTest.java
    incubator/droids/trunk/droids-crawler/src/test/java/org/apache/droids/crawler/parser/impl/
    incubator/droids/trunk/droids-crawler/src/test/java/org/apache/droids/crawler/parser/impl/NekoHtmlParserTest.java
    incubator/droids/trunk/droids-crawler/src/test/java/org/apache/droids/crawler/util/
    incubator/droids/trunk/droids-crawler/src/test/java/org/apache/droids/crawler/util/ParamUtilsTest.java
    incubator/droids/trunk/droids-crawler/src/test/resources/
    incubator/droids/trunk/droids-crawler/src/test/resources/log4j.xml
    incubator/droids/trunk/droids-crawler/src/test/resources/test-LocalCrawlerServiceTest.xml
    incubator/droids/trunk/droids-crawler/src/test/resources/test-RoundRobinCrawlerServiceTest.xml
    incubator/droids/trunk/droids-crawler/src/test/resources/test-StandaloneCrawlerControllerTest.xml
    incubator/droids/trunk/droids-crawler/src/test/resources/test-extractor.HtmlElementLinkExtractorTest.xml
    incubator/droids/trunk/droids-crawler/src/test/resources/test-fetcher.HttpFetcherTest.xml
    incubator/droids/trunk/droids-crawler/src/test/resources/test-parser.DefaultParserFactoryTest.xml
    incubator/droids/trunk/droids-crawler/src/test/resources/test-parser.impl.NekoHtmlParserTest.xml
    incubator/droids/trunk/droids-crawler/src/test/resources/test-util.ParamUtilsTest.xml

Added: incubator/droids/trunk/droids-crawler/README.txt
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/README.txt?rev=810273&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler/README.txt (added)
+++ incubator/droids/trunk/droids-crawler/README.txt Tue Sep  1 22:11:29 2009
@@ -0,0 +1 @@
+droids-crawler is under development. The current trunk release is not integrated with droids-core yet.
\ No newline at end of file

Added: incubator/droids/trunk/droids-crawler/docs/diagrams/concepts.png
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/docs/diagrams/concepts.png?rev=810273&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/droids/trunk/droids-crawler/docs/diagrams/concepts.png
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/droids/trunk/droids-crawler/docs/diagrams/concepts.vsd
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/docs/diagrams/concepts.vsd?rev=810273&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/droids/trunk/droids-crawler/docs/diagrams/concepts.vsd
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/droids/trunk/droids-crawler/docs/diagrams/uml.mdzip
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/docs/diagrams/uml.mdzip?rev=810273&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/droids/trunk/droids-crawler/docs/diagrams/uml.mdzip
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/droids/trunk/droids-crawler/droids-crawler.iml
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/droids-crawler.iml?rev=810273&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler/droids-crawler.iml (added)
+++ incubator/droids/trunk/droids-crawler/droids-crawler.iml Tue Sep  1 22:11:29 2009
@@ -0,0 +1,62 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module relativePaths="true" MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
+  <component name="FacetManager">
+    <facet type="Groovy" name="Groovy">
+      <configuration compile="true" />
+    </facet>
+    <facet type="Spring" name="Spring">
+      <configuration />
+    </facet>
+  </component>
+  <component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_5" inherit-compiler-output="false">
+    <output url="file://$MODULE_DIR$/target/classes" />
+    <output-test url="file://$MODULE_DIR$/target/test-classes" />
+    <content url="file://$MODULE_DIR$">
+      <sourceFolder url="file://$MODULE_DIR$/src/main/groovy" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/src/main/resources" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/src/test/groovy" isTestSource="true" />
+      <sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
+      <sourceFolder url="file://$MODULE_DIR$/src/test/resources" isTestSource="true" />
+      <excludeFolder url="file://$MODULE_DIR$/target" />
+    </content>
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+    <orderEntry type="module" module-name="droids-core" exported="" />
+    <orderEntry type="module" module-name="droids-norobots" exported="" />
+    <orderEntry type="library" exported="" name="Maven: commons-logging:commons-logging:1.1.1" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.apache.httpcomponents:httpclient:4.0-beta2" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.apache.httpcomponents:httpcore:4.0" level="project" />
+    <orderEntry type="library" exported="" name="Maven: commons-codec:commons-codec:1.3" level="project" />
+    <orderEntry type="library" exported="" name="Maven: nekohtml:nekohtml:1.9.6.2" level="project" />
+    <orderEntry type="library" exported="" name="Maven: xerces:xercesImpl:2.8.1" level="project" />
+    <orderEntry type="library" name="Maven: log4j:log4j:1.2.15" level="project" />
+    <orderEntry type="library" exported="" name="Maven: commons-io:commons-io:1.4" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.codehaus.groovy:groovy-all:1.6.3" level="project" />
+    <orderEntry type="library" name="Maven: junit:junit:4.5" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.apache.ant:ant:1.7.1" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.apache.ant:ant-launcher:1.7.1" level="project" />
+    <orderEntry type="library" exported="" name="Maven: jline:jline:0.9.94" level="project" />
+    <orderEntry type="library" exported="" name="Maven: net.sourceforge.nekohtml:nekohtml:1.9.12" level="project" />
+    <orderEntry type="library" exported="" name="Maven: net.htmlparser.jericho:jericho-html:3.1" level="project" />
+    <orderEntry type="library" exported="" name="Maven: net.jcip:jcip-annotations:1.0" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.springframework:spring-core:3.0.0.M3" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.springframework:spring-asm:3.0.0.M3" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.springframework:spring-web:3.0.0.M3" level="project" />
+    <orderEntry type="library" exported="" name="Maven: com.caucho:hessian:3.1.5" level="project" />
+    <orderEntry type="library" exported="" name="Maven: com.caucho:burlap:2.1.12" level="project" />
+    <orderEntry type="library" exported="" name="Maven: aopalliance:aopalliance:1.0" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.springframework:spring-aop:3.0.0.M3" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.springframework:spring-beans:3.0.0.M3" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.springframework:spring-context:3.0.0.M3" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.springframework:spring-expression:3.0.0.M3" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.antlr:antlr:3.0.1" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.antlr:stringtemplate:3.1-b1" level="project" />
+    <orderEntry type="library" exported="" name="Maven: antlr:antlr:2.7.7" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.springframework:spring-oxm:3.0.0.M3" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.springframework:spring-webmvc:3.0.0.M3" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.aspectj:aspectjweaver:1.6.2" level="project" />
+    <orderEntry type="library" exported="" name="Maven: com.google.appengine:appengine-api-1.0-sdk:1.2.1" level="project" />
+  </component>
+</module>
+

Added: incubator/droids/trunk/droids-crawler/pom.xml
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/pom.xml?rev=810273&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler/pom.xml (added)
+++ incubator/droids/trunk/droids-crawler/pom.xml Tue Sep  1 22:11:29 2009
@@ -0,0 +1,276 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~   http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+    <modelVersion>4.0.0</modelVersion>
+    <!--<parent>
+        <artifactId>droids</artifactId>
+        <groupId>org.apache.droids</groupId>
+        <version>0.1-incubating-SNAPSHOT</version>
+        <relativePath>../pom.xml</relativePath>
+    </parent>-->
+    <groupId>org.apache.droids</groupId>
+    <version>0.1</version>
+
+    <artifactId>droids-crawler</artifactId>
+    <name>Droids Crawler Components</name>
+    <inceptionYear>2009</inceptionYear>
+    <description>Droids Crawler Components</description>
+    <packaging>jar</packaging>
+
+    <dependencies>
+        <!--  DROIDS DEPENDENCIES -->
+        <!-- TODO start with no dependency first. to incl core in a later stage-->
+        <dependency>
+            <groupId>org.apache.droids</groupId>
+            <artifactId>droids-core</artifactId>
+            <version>0.1-incubating-SNAPSHOT</version>
+        </dependency>
+
+        <dependency>
+            <groupId>commons-logging</groupId>
+            <artifactId>commons-logging</artifactId>
+            <!--TODO: sync with parent, remove this -->
+            <version>1.1.1</version>
+        </dependency>
+        <dependency>
+            <!--TODO: sync with parent, remove this -->
+            <groupId>log4j</groupId>
+            <artifactId>log4j</artifactId>
+            <version>1.2.15</version>
+            <scope>provided</scope>
+            <exclusions>
+                <exclusion>
+                    <groupId>javax.mail</groupId>
+                    <artifactId>mail</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>javax.jms</groupId>
+                    <artifactId>jms</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.sun.jdmk</groupId>
+                    <artifactId>jmxtools</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.sun.jmx</groupId>
+                    <artifactId>jmxri</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+        <dependency>
+            <!-- TODO sync version -->
+            <groupId>commons-io</groupId>
+            <artifactId>commons-io</artifactId>
+            <version>1.4</version>
+        </dependency>
+        <dependency>
+            <groupId>org.codehaus.groovy</groupId>
+            <artifactId>groovy-all</artifactId>
+            <version>1.6.3</version>
+        </dependency>
+        <dependency>
+            <!--TODO: sync with parent -->
+            <groupId>org.apache.httpcomponents</groupId>
+            <artifactId>httpcore</artifactId>
+            <version>4.0</version>
+        </dependency>
+        <dependency>
+            <!--TODO: sync with parent -->
+            <groupId>org.apache.httpcomponents</groupId>
+            <artifactId>httpclient</artifactId>
+            <version>4.0-beta2</version>
+        </dependency>
+        <dependency>
+            <!--TODO: sync with parent -->
+            <groupId>net.sourceforge.nekohtml</groupId>
+            <artifactId>nekohtml</artifactId>
+            <version>1.9.12</version>
+            <!--<scope>compile</scope>-->
+            <exclusions>
+                <exclusion>
+                    <groupId>xml-apis</groupId>
+                    <artifactId>xml-apis</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+        <dependency>
+            <groupId>net.htmlparser.jericho</groupId>
+            <artifactId>jericho-html</artifactId>
+            <version>3.1</version>
+        </dependency>
+        <dependency>
+            <!-- for threadsafe annotation-->
+            <groupId>net.jcip</groupId>
+            <artifactId>jcip-annotations</artifactId>
+            <version>1.0</version>
+        </dependency>
+
+
+        <!-- Spring 3.0 -->
+        <dependency>
+            <groupId>org.springframework</groupId>
+            <artifactId>spring-core</artifactId>
+            <version>3.0.0.M3</version>
+        </dependency>
+        <dependency>
+            <groupId>org.springframework</groupId>
+            <artifactId>spring-web</artifactId>
+            <version>3.0.0.M3</version>
+        </dependency>
+        <dependency>
+            <groupId>org.springframework</groupId>
+            <artifactId>spring-webmvc</artifactId>
+            <version>3.0.0.M3</version>
+        </dependency>
+        <dependency>
+            <groupId>org.springframework</groupId>
+            <artifactId>spring-aop</artifactId>
+            <version>3.0.0.M3</version>
+        </dependency>
+        <dependency>
+            <groupId>org.aspectj</groupId>
+            <artifactId>aspectjweaver</artifactId>
+            <version>1.6.2</version>
+        </dependency>
+
+        <!-- AppEngine -->
+        <dependency>
+            <groupId>com.google.appengine</groupId>
+            <artifactId>appengine-api-1.0-sdk</artifactId>
+            <version>1.2.1</version>
+            <scope>compile</scope>
+        </dependency>
+
+        <!-- test -->
+        <dependency>
+            <!-- TODO sync version -->
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <scope>test</scope>
+            <version>4.5</version>
+        </dependency>
+
+    </dependencies>
+
+    <build>
+        <resources>
+            <resource>
+                <filtering>false</filtering>
+                <directory>src/main/resources</directory>
+            </resource>
+            <resource>
+                <filtering>false</filtering>
+                <directory>src/main/java</directory>
+                <includes>
+                    <include>**</include>
+                </includes>
+                <excludes>
+                    <exclude>**/*.java</exclude>
+                </excludes>
+            </resource>
+        </resources>
+        <testResources>
+            <testResource>
+                <filtering>false</filtering>
+                <directory>src/test/java</directory>
+                <includes>
+                    <include>**</include>
+                </includes>
+                <excludes>
+                    <exclude>**/*.java</exclude>
+                </excludes>
+            </testResource>
+            <testResource>
+                <filtering>false</filtering>
+                <directory>src/test/resources</directory>
+                <includes>
+                    <include>*.xml</include>
+                </includes>
+            </testResource>
+        </testResources>
+        <plugins>
+            <plugin>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <configuration>
+                    <source>1.5</source>
+                    <target>1.5</target>
+                </configuration>
+            </plugin>
+            <plugin>
+                <artifactId>maven-antrun-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>compile</id>
+                        <phase>compile</phase>
+                        <configuration>
+                            <tasks>
+                                <taskdef name="groovyc" classname="org.codehaus.groovy.ant.Groovyc">
+                                    <classpath refid="maven.compile.classpath"/>
+                                </taskdef>
+                                <mkdir dir="${project.build.outputDirectory}"/>
+                                <groovyc destdir="${project.build.outputDirectory}"
+                                         srcdir="${basedir}/src/main/groovy/" listfiles="true">
+                                    <classpath refid="maven.compile.classpath"/>
+                                </groovyc>
+                            </tasks>
+                        </configuration>
+                        <goals>
+                            <goal>run</goal>
+                        </goals>
+                    </execution>
+                    <execution>
+                        <id>test-compile</id>
+                        <phase>test-compile</phase>
+                        <configuration>
+                            <tasks>
+                                <taskdef name="groovyc" classname="org.codehaus.groovy.ant.Groovyc">
+                                    <classpath refid="maven.compile.classpath"/>
+                                </taskdef>
+                                <mkdir dir="${project.build.testOutputDirectory}"/>
+                                <groovyc destdir="${project.build.testOutputDirectory}"
+                                         srcdir="${basedir}/src/test/groovy/" listfiles="true">
+                                    <classpath refid="maven.test.classpath"/>
+                                </groovyc>
+                            </tasks>
+                        </configuration>
+                        <goals>
+                            <goal>run</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-surefire-plugin</artifactId>
+                <version>2.4.3</version>
+                <configuration>
+                    <includes>
+                        <include>**/*Test.java</include>
+                    </includes>
+                    <forkMode>once</forkMode>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+
+</project>
\ No newline at end of file

Added: incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/Droid.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/Droid.java?rev=810273&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/Droid.java (added)
+++ incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/Droid.java Tue Sep  1 22:11:29 2009
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.droids.core;
+
+import org.apache.droids.exception.DroidsException;
+
+public interface Droid<T extends Task>{  //TODO use Droid from core and remove this dummy interface
+
+    void init() throws DroidsException;
+
+    boolean isStarted();
+
+    void start();
+
+    Worker getNewWorker();
+
+    TaskMaster getTaskMaster();
+
+}

Added: incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/Entity.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/Entity.java?rev=810273&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/Entity.java (added)
+++ incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/Entity.java Tue Sep  1 22:11:29 2009
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.droids.core;
+
+import org.apache.http.Header;
+import org.apache.http.HttpEntity;
+import org.apache.http.message.BasicHeader;
+import org.apache.http.protocol.HTTP;
+import org.apache.http.util.EntityUtils;
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.Log;
+
+import java.io.*;
+
+/**
+ * A HttpEntity implementation that is serializable, use byte[] as buffer, and support an additional field "charset" and
+ * may potentially add with other fields required by the Crawler.
+ * <p/>
+ * It is named Entity rather than HttpEntity to indicate the Crawler may support more than HTTP crawling.
+ * TODO consider to make it extends HashMap like the Link
+ */
+public class Entity implements Serializable{
+    static Log log = LogFactory.getLog(Entity.class);
+    private static final long serialVersionUID = 1L;
+    protected String contentType;
+    protected String contentEncoding;
+    protected String charset;//TODO review this
+    protected final byte[] content;
+
+    public Entity(byte[] input){
+        this.content = input;
+    }
+
+    public Entity(HttpEntity httpEntity) throws IOException{
+        this.content = EntityUtils.toByteArray(httpEntity);
+        this.contentEncoding = httpEntity.getContentEncoding() != null ? httpEntity.getContentEncoding().getValue() : null;
+        this.contentType = httpEntity.getContentType() != null ? httpEntity.getContentType().getValue() : null;
+    }
+
+    /*public Entity(InputStream inputStream, long size) throws IOException{
+        this.httpEntity = new BufferedHttpEntity(new InputStreamEntity(inputStream, size));
+    }*/
+
+    @Override public String toString(){
+        StringBuilder out = new StringBuilder();
+        out.append("Entity ( ");
+        out.append("content.length: ").append(content.length).append(", ");
+        out.append("contentType: ").append(contentType).append(", ");
+        out.append("contentEncoding: ").append(contentEncoding).append(", ");
+        out.append("charset: ").append(charset);
+        out.append(" )");
+        return out.toString();
+    }
+
+
+    public boolean isRepeatable(){return true;}
+
+    public boolean isChunked(){
+        throw new UnsupportedOperationException("unimplemneted yet");
+    }
+
+    public long getContentLength(){ return content.length; }
+
+    /*public Header getContentType(){ return new BasicHeader(HTTP.CONTENT_TYPE, contentType); }
+
+    public Header getContentEncoding(){ return new BasicHeader(HTTP.CONTENT_ENCODING, contentEncoding); }*/
+
+    public InputStream getContent(){ return new ByteArrayInputStream(this.content); }
+
+    public void writeTo(OutputStream outputStream) throws IOException{ outputStream.write(this.content); }
+
+    public boolean isStreaming(){ return false; }
+
+    public void consumeContent() throws IOException{}
+
+    public String getContentType(){
+        return contentType;
+    }
+
+    public String getContentEncoding(){
+        return contentEncoding;
+    }
+
+    public void setContentType(String contentType){
+        this.contentType = contentType;
+    }
+
+    public void setContentEncoding(String contentEncoding){
+        this.contentEncoding = contentEncoding;
+    }
+
+    public String getCharset(){
+        return charset;
+    }
+
+    public void setCharset(String charset){
+        this.charset = charset;
+    }
+
+
+    public byte[] getBytes(){
+        return this.content;
+    }
+
+    /**
+     * Get content as String
+     */
+    public String getText(){
+        try{
+            return getContentEncoding() != null ? new String(this.content, getContentEncoding()) : new String(this.content);
+        } catch (UnsupportedEncodingException e){
+            log.error("getText() - error - e: " + e, e);
+            return null;
+        }
+
+    }
+}

Added: incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/Task.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/Task.java?rev=810273&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/Task.java (added)
+++ incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/Task.java Tue Sep  1 22:11:29 2009
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.droids.core;
+
+/**
+ * TODO should id be long or String? see http://issues.apache.org/jira/browse/DROIDS-53
+ */
+public interface Task{
+    long getId();
+}

Added: incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/TaskMaster.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/TaskMaster.java?rev=810273&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/TaskMaster.java (added)
+++ incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/TaskMaster.java Tue Sep  1 22:11:29 2009
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.droids.core;
+
+import java.util.Queue;
+import java.util.concurrent.ExecutorService;
+
+
+public interface TaskMaster<T extends Task>{
+
+    ExecutorService getExecutorService();
+
+    void processAllTasks(final Queue<T> queue, final Droid<T> droid);
+}

Added: incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/Worker.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/Worker.java?rev=810273&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/Worker.java (added)
+++ incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/Worker.java Tue Sep  1 22:11:29 2009
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.droids.core;
+
+import org.apache.droids.crawler.Link;
+
+public interface Worker<T extends Link> extends Runnable{
+
+}

Added: incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/thread/DefaultRejectedExecutionHandler.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/thread/DefaultRejectedExecutionHandler.java?rev=810273&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/thread/DefaultRejectedExecutionHandler.java (added)
+++ incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/thread/DefaultRejectedExecutionHandler.java Tue Sep  1 22:11:29 2009
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.droids.core.thread;
+
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.Log;
+
+import java.util.concurrent.RejectedExecutionHandler;
+import java.util.concurrent.ThreadPoolExecutor;
+
+public class DefaultRejectedExecutionHandler implements RejectedExecutionHandler{
+    static Log log = LogFactory.getLog(DefaultRejectedExecutionHandler.class);
+
+    public void rejectedExecution(Runnable r, ThreadPoolExecutor executor){
+        log.warn("rejectedExecution() - runnable: " + r + ", threadPoolExecutor: " + executor);
+    }
+}

Added: incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/thread/DefaultThreadFactory.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/thread/DefaultThreadFactory.java?rev=810273&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/thread/DefaultThreadFactory.java (added)
+++ incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/thread/DefaultThreadFactory.java Tue Sep  1 22:11:29 2009
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.droids.core.thread;
+
+import java.util.concurrent.ThreadFactory;
+import java.util.concurrent.atomic.AtomicInteger;
+
+public class DefaultThreadFactory implements ThreadFactory{
+    final ThreadGroup threadGroup = new ThreadGroup(System.getSecurityManager() != null ?
+            System.getSecurityManager().getThreadGroup() : Thread.currentThread().getThreadGroup(),
+            "droids");
+    AtomicInteger threadNumber = new AtomicInteger(1);
+
+    public Thread newThread(Runnable r){
+        return new Thread(threadGroup, r, "droids-" + threadNumber.getAndIncrement(), 0);
+    }
+}

Added: incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/AbstractCrawlerController.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/AbstractCrawlerController.java?rev=810273&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/AbstractCrawlerController.java (added)
+++ incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/AbstractCrawlerController.java Tue Sep  1 22:11:29 2009
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.droids.crawler;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.droids.crawler.filter.LinkFilter;
+import org.apache.droids.crawler.util.DefaultLinkQueue;
+import org.apache.droids.exception.DroidsException;
+import org.apache.droids.core.Droid;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.context.ApplicationContext;
+
+import java.io.Serializable;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.*;
+
+public abstract class AbstractCrawlerController<T extends Link> implements CrawlerController<T>{
+    protected static final Log log = LogFactory.getLog(AbstractCrawlerController.class);
+    @Autowired protected ApplicationContext applicationContext;
+    protected Queue<T> queue;
+    @Autowired(required = false) protected List<LinkFilter<T>> filters;
+    @Autowired(required = false) @Qualifier("crawler.filterComparator") protected Comparator filterComparator;
+    @Autowired(required = false) protected CrawlerService<T> crawlerService;
+    protected Collection seeds;
+
+    public void init() throws DroidsException{
+        if (queue == null) queue = new DefaultLinkQueue<T>();
+        if (filters != null && filterComparator != null) Collections.sort(filters, filterComparator);
+        if (seeds != null){
+            int counter = 0;
+            long current = System.currentTimeMillis();
+            Map<String, Serializable> seedLinkData = new HashMap<String, Serializable>();
+            for (Object seed : seeds){
+                seedLinkData.put("created", current + counter++);
+                try{
+                    if (seed instanceof Link) queue.add((T) seed);
+                    else if (seed instanceof String) queue.add((T) new Link((String) seed, seedLinkData));
+                    else if (seed instanceof URI) queue.add((T) new Link((URI) seed, seedLinkData));
+                } catch (URISyntaxException e){
+                    log.error("fail to construct a seed link, skipped - seed: " + seed, e);
+                }
+            }
+        }
+    }
+
+    public boolean addFilter(LinkFilter<T> filter){
+        if (this.filters == null) this.filters = new ArrayList<LinkFilter<T>>();
+        return this.filters.add(filter);
+    }
+
+    public boolean removeFilter(LinkFilter<T> filter){
+        return this.filters != null && this.filters.remove(filter);
+    }
+
+    public void setFilters(List<LinkFilter<T>> filters){
+        this.filters = filters;
+    }
+
+    public List<LinkFilter<T>> getFilters(){
+        return filters;
+    }
+
+    public boolean hasFilter(){
+        return filters != null && filters.size() > 0;
+    }
+
+    public Queue<T> getQueue(){
+        return queue;
+    }
+
+    public void setQueue(Queue<T> queue){
+        this.queue = queue;
+    }
+
+    public Comparator getFilterComparator(){
+        return filterComparator;
+    }
+
+    public void setFilterComparator(Comparator filterComparator){
+        this.filterComparator = filterComparator;
+    }
+
+    public CrawlerService<T> getCrawlerService(){
+        return crawlerService;
+    }
+
+    public void setCrawlerService(CrawlerService<T> crawlerService){
+        this.crawlerService = crawlerService;
+    }
+
+    public Collection getSeeds(){
+        return seeds;
+    }
+
+    public void setSeeds(Collection seeds){
+        this.seeds = seeds;
+    }
+
+}

Added: incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/AbstractCrawlerService.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/AbstractCrawlerService.java?rev=810273&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/AbstractCrawlerService.java (added)
+++ incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/AbstractCrawlerService.java Tue Sep  1 22:11:29 2009
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.droids.crawler;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.droids.crawler.filter.FilterSupport;
+import org.apache.droids.crawler.filter.LinkFilter;
+
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+
+public abstract class AbstractCrawlerService<T extends Link> implements CrawlerService<T>{
+    static Log log = LogFactory.getLog(AbstractCrawlerService.class);
+
+    /**
+     * Distributed Crawler Service shall override this method if it is not one instance per host.
+     */
+    public String getNode(){
+        try{
+            return InetAddress.getLocalHost().getHostName();
+        } catch (UnknownHostException e){
+            log.error(e);
+            return "null";
+        }
+    }
+}

Added: incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/AbstractCrawlerWorker.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/AbstractCrawlerWorker.java?rev=810273&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/AbstractCrawlerWorker.java (added)
+++ incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/AbstractCrawlerWorker.java Tue Sep  1 22:11:29 2009
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.droids.crawler;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.droids.core.Worker;
+import org.apache.droids.crawler.filter.FilterSupport;
+import org.apache.droids.crawler.filter.LinkFilter;
+import org.springframework.beans.factory.annotation.Autowired;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+
+public abstract class AbstractCrawlerWorker<T extends Link> implements Worker, FilterSupport<LinkFilter<T>>{
+    static Log log = LogFactory.getLog(AbstractCrawlerWorker.class);
+    @Autowired(required = false) protected List<LinkFilter<T>> filters;
+
+    protected T polled(T link){
+        if (hasFilter()){
+            for (LinkFilter<T> filter : getFilters()){
+                link = filter.polled(link);
+                if (log.isTraceEnabled())
+                    log.trace("polled - filter: " + filter + ", link: " + link);
+            }
+        }
+        if (link != null) link.setState(Link.State.POLLED);
+        return link;
+    }
+
+    /*public void completed(T link, final Fetcher<T> fetcher, final Parser<T, Object> parser, final List<Extractor<T, ? extends Parser>> extractors, final Set<T> links);
+    public void failed(T link, int stage, Object object);
+    */
+    protected T completed(T link, Set<T> outlinks){
+        if (hasFilter()){
+            for (LinkFilter<T> filter : getFilters()){
+                filter.completed(link, outlinks);
+                if (log.isTraceEnabled()) log.trace("completed - filter: " + filter + ", link: " + link);
+            }
+        }
+        if (link != null) link.setState(Link.State.COMPLETED);
+        return link;
+    }
+
+    protected T failed(T link, Object object){
+        if (hasFilter()){
+            for (LinkFilter<T> filter : getFilters()){
+                filter.failed(link, object);
+                if (log.isTraceEnabled()) log.trace("completed - filter: " + filter + ", link: " + link);
+            }
+        }
+        if (link != null) link.setState(Link.State.FAILED);
+        return link;
+    }
+
+
+    public boolean addFilter(LinkFilter<T> filter){
+        if (filters == null) filters = new ArrayList<LinkFilter<T>>();
+        return filters.add(filter);
+    }
+
+    public boolean removeFilter(LinkFilter<T> filter){
+        return filters.remove(filter);
+    }
+
+    public void setFilters(List<LinkFilter<T>> filters){
+        this.filters = filters;
+    }
+
+    public List<LinkFilter<T>> getFilters(){
+        return filters;
+    }
+
+    public boolean hasFilter(){
+        return filters != null && filters.size() > 0;
+    }
+}

Added: incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlerController.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlerController.java?rev=810273&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlerController.java (added)
+++ incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlerController.java Tue Sep  1 22:11:29 2009
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.droids.crawler;
+
+import org.apache.droids.core.Droid;
+import org.apache.droids.crawler.filter.FilterSupport;
+import org.apache.droids.crawler.filter.LinkFilter;
+
+import java.util.Queue;
+
+public interface CrawlerController<T extends Link> extends FilterSupport<LinkFilter<T>>{//TODO make it extends Droid
+
+    Queue<T> getQueue();
+
+    CrawlerService<T> getCrawlerService();
+
+    void start() throws CrawlerException;
+}

Added: incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlerException.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlerException.java?rev=810273&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlerException.java (added)
+++ incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlerException.java Tue Sep  1 22:11:29 2009
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.droids.crawler;
+
+public class CrawlerException extends Exception {
+    public CrawlerException() {
+    }
+
+    public CrawlerException(String message) {
+        super(message);
+    }
+
+    public CrawlerException(String message, Throwable cause) {
+        super(message, cause);
+    }
+
+    public CrawlerException(Throwable cause) {
+        super(cause);
+    }
+}

Added: incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlerService.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlerService.java?rev=810273&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlerService.java (added)
+++ incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlerService.java Tue Sep  1 22:11:29 2009
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.droids.crawler;
+
+import org.apache.droids.crawler.parser.ParserException;
+import org.apache.droids.crawler.fetcher.FetcherException;
+import org.apache.droids.crawler.extractor.ExtractorException;
+
+/**
+ * Crawler Service is a Facade of Fetcher, Parser and Extractor. It uses Link (that extends Map) as the main data
+ * container for every process. "fetched", "parsed" and "extracted" attributes are added to the Link after each step.
+ */
+public interface CrawlerService<T extends Link>{
+    static final long DEFAULT_SERIALVERSIONID = 1L;
+
+    String getVersion();
+
+    String getNode();
+
+    /**
+     * fetch, to specify Fetcher and Parser, use link attribute according to @org.apache.droids.crawler.Link
+     *
+     * @param link
+     * @return a Link with "fetched" attribute of @org.apache.droids.crawler.fetcher.Fetcher type
+     */
+    T fetch(T link) throws FetcherException;
+
+    /**
+     * fetch and parse, to specify Fetcher and Parser, use link attribute according to @org.apache.droids.crawler.Link
+     *
+     * @return a Link with "fetch.fetcher" attribute of @org.apache.droids.crawler.fetcher.Fetcher type, and
+     *         "parsed" attribute of @org.apache.droids.crawler.parser.Parser type
+     */
+    T parse(T link) throws ParserException;
+
+    T extract(T link) throws ExtractorException;
+
+}

Added: incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlerWorker.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlerWorker.java?rev=810273&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlerWorker.java (added)
+++ incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlerWorker.java Tue Sep  1 22:11:29 2009
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.droids.crawler;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.springframework.beans.factory.annotation.Autowired;
+
+import java.util.Queue;
+import java.util.Set;
+
+public class CrawlerWorker<T extends Link> extends AbstractCrawlerWorker<T>{
+    static Log log = LogFactory.getLog(CrawlerWorker.class);
+    protected Queue<T> queue;
+    @Autowired(required = false) protected CrawlerService<T> crawlerService;
+    protected int count = 0;
+
+    public void run(){
+        T link = null;
+        try{
+            link = queue.poll();
+            if (log.isTraceEnabled()) log.trace("run() - polled link: " + link + ", crawlerService: " + crawlerService);
+            if (link != null){
+                link = crawlerService.extract(link);
+                if (log.isTraceEnabled()) log.trace("run() - extracted - link: " + link);
+                if (link.containsKey("extracted")){
+                    Set<T> outlinks = link.remove("extracted", Set.class);
+                    this.completed(link, outlinks);
+                    count++;
+                }
+            } else{
+                log.warn("run() - fail to poll any link - queue: " + queue.toString());
+            }
+        } catch (Exception e){
+            log.error("run() - error", e);
+            this.failed(link, e);
+        }
+    }
+
+    public Queue<T> getQueue(){
+        return queue;
+    }
+
+    public void setQueue(Queue<T> queue){
+        this.queue = queue;
+    }
+
+    public CrawlerService<T> getCrawlerService(){
+        return crawlerService;
+    }
+
+    public void setCrawlerService(CrawlerService<T> crawlerService){
+        this.crawlerService = crawlerService;
+    }
+
+    public int getCount(){
+        return count;
+    }
+
+    public void setCount(int count){
+        this.count = count;
+    }
+}

Added: incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/Link.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/Link.java?rev=810273&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/Link.java (added)
+++ incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/Link.java Tue Sep  1 22:11:29 2009
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.droids.crawler;
+
+import org.apache.droids.crawler.util.URIResolver;
+import org.apache.droids.crawler.util.logging.LogUtils;
+
+import java.io.Serializable;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Link is the main unit for any crawling process including every step and every filter method.
+ * As a Map, it holds any arbitary data.
+ * <p/>
+ * By design, any method operates with a link uses Link as the first argument.
+ */
+public class Link extends HashMap<String, Object> implements Serializable{
+    public static enum State{
+        INIT, POLLED, FETCHED, PARSED, EXTRACTED, COMPLETED, FAILED
+    }
+
+    protected long id;
+    protected String url;
+    protected State state = State.INIT;
+
+    public Link(){
+    }
+
+    public Link(URI uri){
+        this(uri, null);
+    }
+
+    public Link(String url) throws URISyntaxException{
+        this(new URI(url), null);
+    }
+
+    public Link(String url, Map<String, ?> initData) throws URISyntaxException{
+        this(new URI(url), initData);
+    }
+
+    public Link(URI uri, Map<String, ?> initData){
+        this.url = uri.toString();
+        this.id = createId(this.url);
+        if (initData != null){
+            for (String key : initData.keySet()) this.put(key, initData.get(key));
+        }
+    }
+
+    protected long createId(String uri){
+        long id = uri.hashCode();
+        id = (id << 32) >>> 32;
+        return id;
+    }
+
+    public String toString(){
+        StringBuilder out = new StringBuilder();
+        out.append("Link ( ").append("url: " + LogUtils.toString(url));
+        out.append(", id: " + LogUtils.toString(id));
+        out.append(", state: " + state);
+        if (this.size() > 0) out.append(", data: ").append(LogUtils.toString(this));
+        out.append(" )");
+        return out.toString();
+    }
+
+    @Override
+    public boolean equals(Object o){
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+        if (!super.equals(o)) return false;
+
+        Link link = (Link) o;
+
+        if (id != link.id) return false;
+
+        return true;
+    }
+
+    @Override
+    public int hashCode(){
+        int result = 0;
+        result = 31 * result + (int) (id ^ (id >>> 32));
+        result = 31 * result + (url != null ? url.hashCode() : 0);
+        return result;
+    }
+
+    public <T> T get(String key, Class<T> clazz){
+        return (T) this.get(key);
+    }
+
+    public <T> T remove(String key, Class<T> clazz){
+        return (T) this.remove(key);
+    }
+
+    public String getHost(){
+        try{
+            return new URI(url).getHost();
+        } catch (URISyntaxException e){
+            // it shall not happen, new URI() is validated in constructor
+            return null;
+        }
+    }
+
+    public void setUrl(String url) throws URISyntaxException{
+        this.url = new URI(url).toString();
+    }
+
+    public String getUrl(){
+        return url;
+    }
+
+    public Link resolve(String target){
+        URI resolved = null;
+        try{
+            resolved = new URIResolver(new URI(url)).resolve(target);
+            return resolved != null ? new Link(resolved) : null;
+        } catch (URISyntaxException e){
+            // it shall not happen, new URI() is validated in constructor
+            return null;
+        }
+    }
+
+    public long getId(){
+        return id;
+    }
+
+    public int getDepth(){
+        throw new UnsupportedOperationException("please use get(\"depth\")");
+    }
+
+    public Date getTaskDate(){
+        return null;  //To change body of implemented methods use File | Settings | File Templates.
+    }
+
+    public URI getURI(){
+        try{
+            return new URI(url);
+        } catch (URISyntaxException e){
+            // it shall not happen, new URI() is validated in constructor
+            return null;
+        }
+    }
+
+    public State getState(){
+        return state;
+    }
+
+    public void setState(State state){
+        this.state = state;
+    }
+
+}

Added: incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/LocalCrawlerService.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/LocalCrawlerService.java?rev=810273&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/LocalCrawlerService.java (added)
+++ incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/LocalCrawlerService.java Tue Sep  1 22:11:29 2009
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.droids.crawler;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.droids.crawler.extractor.Extractor;
+import org.apache.droids.crawler.extractor.ExtractorException;
+import org.apache.droids.crawler.fetcher.Fetcher;
+import org.apache.droids.crawler.fetcher.FetcherException;
+import org.apache.droids.crawler.fetcher.FetcherFactory;
+import org.apache.droids.crawler.filter.LinkFilter;
+import org.apache.droids.crawler.filter.FetchFilter;
+import org.apache.droids.crawler.parser.Parser;
+import org.apache.droids.crawler.parser.ParserException;
+import org.apache.droids.crawler.parser.ParserFactory;
+import org.springframework.beans.factory.annotation.Autowired;
+
+import javax.annotation.PostConstruct;
+import java.io.Serializable;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+public class LocalCrawlerService<T extends Link> extends AbstractCrawlerService<T>{
+    static Log log = LogFactory.getLog(LocalCrawlerService.class);
+    @Autowired(required = false) protected FetcherFactory<T> fetcherFactory;
+    @Autowired(required = false) protected ParserFactory<T> parserFactory;
+    @Autowired(required = false) protected List<Extractor<T, Parser<T, ?>>> extractors;
+
+    @PostConstruct
+    public LocalCrawlerService<T> init(){
+        if (log.isInfoEnabled()){
+            log.info("init() - fetcherFactory: " + fetcherFactory);
+            log.info("\t\tparserFactory: " + parserFactory);
+            log.info("\t\textractors: " + ((extractors != null) ? extractors.size() : -1) + ", extractors: " + extractors);
+        }
+        return this;
+    }
+
+    public String getVersion(){return "0.1";}
+
+    public T fetch(T link) throws FetcherException{
+        if (log.isTraceEnabled()) log.trace("fetch() - link: " + link);
+        try{
+            Fetcher fetcher = fetcherFactory.newFetcher(link);
+            if (fetcher == null)
+                throw new FetcherException("no supported fetcher - link: " + link + ", fetcherFactory: " + fetcherFactory);
+            fetcher.fetch(link, link.get("params", Map.class));
+            link.put("fetched", fetcher.getStatusCode() == Fetcher.SUCCESS ? fetcher : false);
+            return link;
+        } catch (Exception e){
+            log.error("fetch() - error - link: " + link + ", error: " + e.getMessage());
+            throw new FetcherException("fail to fetch", e);
+        }
+    }
+
+    public T parse(T link) throws ParserException{
+        if (log.isTraceEnabled()) log.trace("parse() - link: " + link);
+        Fetcher fetcher = link.remove("fetched", Fetcher.class);
+
+        if (fetcher == null){
+            try{
+                fetcher = this.fetch(link).remove("fetched", Fetcher.class);
+            } catch (FetcherException e){
+                throw new ParserException("failed in auto-fetch", e);
+            } finally{
+                if (fetcher == null)
+                    throw new IllegalStateException("link is not fetched - link: " + link);
+            }
+        }
+
+        if (log.isTraceEnabled()) log.trace("parse() - fetcher: " + fetcher + ", parserFactory: " + parserFactory);
+        Parser parser = parserFactory.newParser(link);
+        parser.parse(link, fetcher.getEntity(), null);
+        link.put("parsed", parser);
+        return link;
+    }
+
+    public T extract(T link) throws ExtractorException{
+        if (log.isTraceEnabled())
+            log.trace("extract() - link: " + link + ", extractors.size(): " + (extractors != null ? extractors.size() : -1) + ", extractors: " + extractors);
+        try{
+            if (!link.containsKey("parsed")){
+                if (!link.containsKey("fetched")) link = fetch(link);
+                link = parse(link);
+            }
+        } catch (FetcherException e){
+            throw new ExtractorException("failed in auto fetch", e);
+        } catch (ParserException e){
+            throw new ExtractorException("failed in auto parse", e);
+        }
+        if (!link.containsKey("parsed")) throw new ExtractorException("fail to auto fetch/parse - link: " + link);
+
+        Set<T> outlinks = new HashSet<T>();
+        Parser<T, ?> parser = link.remove("parsed", Parser.class);
+        if (parser == null) throw new ExtractorException("link must have a \"parsed\" attribute");
+
+        if (extractors == null){
+            log.warn("extract() - no extractor is configured");
+            return link;
+        } else{
+            for (Extractor<T, Parser<T, ?>> extractor : extractors){
+                if (extractor.matches(link)){
+                    Set<T> outlink1 = extractor.extract(link, parser);
+                    if (outlink1 != null && outlink1.size() > 0) outlinks.addAll(outlink1);
+                }
+            }
+        }
+        if (outlinks != null && outlinks.size() > 0) link.put("extracted", (Serializable) outlinks);
+        return link;
+
+    }
+
+    public FetcherFactory<T> getFetcherFactory(){
+        return fetcherFactory;
+    }
+
+    public void setFetcherFactory(FetcherFactory<T> fetcherFactory){
+        this.fetcherFactory = fetcherFactory;
+    }
+
+    public ParserFactory<T> getParserFactory(){
+        return parserFactory;
+    }
+
+    public void setParserFactory(ParserFactory<T> parserFactory){
+        this.parserFactory = parserFactory;
+    }
+
+    public List<Extractor<T, Parser<T, ?>>> getExtractors(){
+        return extractors;
+    }
+
+    public void setExtractors(List<Extractor<T, Parser<T, ?>>> extractors){
+        this.extractors = extractors;
+    }
+}

Added: incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/RoundRobinCrawlerService.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/RoundRobinCrawlerService.java?rev=810273&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/RoundRobinCrawlerService.java (added)
+++ incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/RoundRobinCrawlerService.java Tue Sep  1 22:11:29 2009
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.droids.crawler;
+
+import org.springframework.beans.factory.annotation.Autowired;
+import org.apache.droids.crawler.fetcher.FetcherException;
+import org.apache.droids.crawler.parser.ParserException;
+import org.apache.droids.crawler.extractor.ExtractorException;
+
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * Delegate to the list of configured CrawlerService.
+ * <p/>
+ * One of the most useful use case is to configure a Crawler Controller to use a RoundRobinCrawlerService with
+ * multiple remote CrawlerService distributed in the cloud.
+ */
+public class RoundRobinCrawlerService<T extends Link> extends AbstractCrawlerService<T>{
+    @Autowired(required = false) List<CrawlerService<T>> crawlerServices;
+    protected AtomicInteger counter = new AtomicInteger();
+
+    public String getVersion(){return "0.1";}
+
+    public T fetch(T link) throws FetcherException{
+        return crawlerServices.get(counter.getAndIncrement() % crawlerServices.size()).fetch(link);
+    }
+
+    public T parse(T link) throws ParserException{
+        return crawlerServices.get(counter.getAndIncrement() % crawlerServices.size()).parse(link);
+    }
+
+    public T extract(T link) throws ExtractorException{
+        return crawlerServices.get(counter.getAndIncrement() % crawlerServices.size()).extract(link);
+    }
+
+    public List<CrawlerService<T>> getCrawlerServices(){
+        return crawlerServices;
+    }
+
+    public void setCrawlerServices(List<CrawlerService<T>> crawlerServices){
+        this.crawlerServices = crawlerServices;
+    }
+}