You are viewing a plain text version of this content. The canonical link for it is here.
Posted to droids-commits@incubator.apache.org by mi...@apache.org on 2009/09/01 22:19:38 UTC

svn commit: r810279 - in /incubator/droids/trunk/droids-crawler-web: ./ src/ src/main/ src/main/java/ src/main/resources/ src/main/webapp/ src/main/webapp/WEB-INF/ src/test/ src/test/java/ src/test/java/org/ src/test/java/org/apache/ src/test/java/org/...

Author: mingfai
Date: Tue Sep  1 22:19:37 2009
New Revision: 810279

URL: http://svn.apache.org/viewvc?rev=810279&view=rev
Log:
imported the initial crawler web services codebase

Added:
    incubator/droids/trunk/droids-crawler-web/README.txt
    incubator/droids/trunk/droids-crawler-web/droids-crawler-web.iml
    incubator/droids/trunk/droids-crawler-web/pom.xml
    incubator/droids/trunk/droids-crawler-web/src/
    incubator/droids/trunk/droids-crawler-web/src/main/
    incubator/droids/trunk/droids-crawler-web/src/main/java/
    incubator/droids/trunk/droids-crawler-web/src/main/resources/
    incubator/droids/trunk/droids-crawler-web/src/main/resources/spring-crawler-service.xml
    incubator/droids/trunk/droids-crawler-web/src/main/webapp/
    incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/
    incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/appengine-web.xml
    incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/logging.properties
    incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/spring-web.xml
    incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/web.xml
    incubator/droids/trunk/droids-crawler-web/src/test/
    incubator/droids/trunk/droids-crawler-web/src/test/java/
    incubator/droids/trunk/droids-crawler-web/src/test/java/org/
    incubator/droids/trunk/droids-crawler-web/src/test/java/org/apache/
    incubator/droids/trunk/droids-crawler-web/src/test/java/org/apache/droids/
    incubator/droids/trunk/droids-crawler-web/src/test/java/org/apache/droids/crawler/
    incubator/droids/trunk/droids-crawler-web/src/test/java/org/apache/droids/crawler/RemoteCrawlerServiceTest.java
    incubator/droids/trunk/droids-crawler-web/src/test/resources/
    incubator/droids/trunk/droids-crawler-web/src/test/resources/test-RemoteCrawlerServiceTest.xml

Added: incubator/droids/trunk/droids-crawler-web/README.txt
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler-web/README.txt?rev=810279&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler-web/README.txt (added)
+++ incubator/droids/trunk/droids-crawler-web/README.txt Tue Sep  1 22:19:37 2009
@@ -0,0 +1,11 @@
+this is the web service module for droids-crawler. as a JEE project, it is expected to be deployed to any JEE web
+container. It also support Google App Engine (and is actually designed for GAE for its unlimited scalability) 
+
+Notice that the test cases in this module may depend on a deployed web services. i.e. the test case makes HTTP
+connection to the deployed service to perform testing.
+
+For GAE deployment:
+1. update the YOUR_APP_ID in /src/main/webapp/WEB-INF/appengine-web.xml, and execute the corresponding GAE deployment
+   command.
+
+2. update the YOUR_APP_ID in /src/test/resources/test-*.xml and deployment path for executing test cases.    
\ No newline at end of file

Added: incubator/droids/trunk/droids-crawler-web/droids-crawler-web.iml
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler-web/droids-crawler-web.iml?rev=810279&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler-web/droids-crawler-web.iml (added)
+++ incubator/droids/trunk/droids-crawler-web/droids-crawler-web.iml Tue Sep  1 22:19:37 2009
@@ -0,0 +1,225 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module relativePaths="true" MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
+  <component name="FacetManager">
+    <facet type="web" name="Web">
+      <configuration>
+        <descriptors>
+          <deploymentDescriptor name="web.xml" url="file://$MODULE_DIR$/src/main/webapp/WEB-INF/web.xml" />
+        </descriptors>
+        <webroots>
+          <root url="file://$MODULE_DIR$/src/main/webapp" relative="/" />
+        </webroots>
+        <sourceRoots>
+          <root url="file://$MODULE_DIR$/src/main/java" />
+          <root url="file://$MODULE_DIR$/src/main/resources" />
+          <root url="file://$MODULE_DIR$/src/test/java" />
+        </sourceRoots>
+        <building>
+          <setting name="EXPLODED_URL" value="file://$MODULE_DIR$/../out/exploded/droids-crawler-webWeb" />
+          <setting name="EXPLODED_ENABLED" value="true" />
+          <setting name="JAR_URL" value="file://" />
+          <setting name="JAR_ENABLED" value="false" />
+          <setting name="EXCLUDE_EXPLODED_DIRECTORY" value="true" />
+        </building>
+        <packaging>
+          <containerElement type="module" name="droids-crawler-web">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/classes" />
+          </containerElement>
+          <containerElement type="module" name="droids-crawler">
+            <attribute name="method" value="5" />
+            <attribute name="URI" value="/WEB-INF/lib/droids-crawler-0.1.jar" />
+          </containerElement>
+          <containerElement type="module" name="droids-core">
+            <attribute name="method" value="5" />
+            <attribute name="URI" value="/WEB-INF/lib/droids-core-0.1-incubating-SNAPSHOT.jar" />
+          </containerElement>
+          <containerElement type="module" name="droids-norobots">
+            <attribute name="method" value="5" />
+            <attribute name="URI" value="/WEB-INF/lib/droids-norobots-0.1-incubating-SNAPSHOT.jar" />
+          </containerElement>
+          <containerElement type="library" name="Maven: commons-logging:commons-logging:1.1.1" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+          <containerElement type="library" name="Maven: org.apache.httpcomponents:httpclient:4.0-beta2" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+          <containerElement type="library" name="Maven: org.apache.httpcomponents:httpcore:4.0" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+          <containerElement type="library" name="Maven: commons-codec:commons-codec:1.3" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+          <containerElement type="library" name="Maven: nekohtml:nekohtml:1.9.6.2" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+          <containerElement type="library" name="Maven: xerces:xercesImpl:2.8.1" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+          <containerElement type="library" name="Maven: commons-io:commons-io:1.4" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+          <containerElement type="library" name="Maven: org.codehaus.groovy:groovy-all:1.6.3" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+          <containerElement type="library" name="Maven: junit:junit:3.8.2" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+          <containerElement type="library" name="Maven: org.apache.ant:ant:1.7.1" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+          <containerElement type="library" name="Maven: org.apache.ant:ant-launcher:1.7.1" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+          <containerElement type="library" name="Maven: jline:jline:0.9.94" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+          <containerElement type="library" name="Maven: net.sourceforge.nekohtml:nekohtml:1.9.12" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+          <containerElement type="library" name="Maven: net.htmlparser.jericho:jericho-html:3.1" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+          <containerElement type="library" name="Maven: net.jcip:jcip-annotations:1.0" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+          <containerElement type="library" name="Maven: org.springframework:spring-core:3.0.0.M3" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+          <containerElement type="library" name="Maven: org.springframework:spring-asm:3.0.0.M3" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+          <containerElement type="library" name="Maven: org.springframework:spring-web:3.0.0.M3" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+          <containerElement type="library" name="Maven: com.caucho:hessian:3.1.5" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+          <containerElement type="library" name="Maven: com.caucho:burlap:2.1.12" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+          <containerElement type="library" name="Maven: aopalliance:aopalliance:1.0" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+          <containerElement type="library" name="Maven: org.springframework:spring-aop:3.0.0.M3" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+          <containerElement type="library" name="Maven: org.springframework:spring-beans:3.0.0.M3" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+          <containerElement type="library" name="Maven: org.springframework:spring-context:3.0.0.M3" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+          <containerElement type="library" name="Maven: org.springframework:spring-expression:3.0.0.M3" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+          <containerElement type="library" name="Maven: org.antlr:antlr:3.0.1" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+          <containerElement type="library" name="Maven: org.antlr:stringtemplate:3.1-b1" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+          <containerElement type="library" name="Maven: antlr:antlr:2.7.7" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+          <containerElement type="library" name="Maven: org.springframework:spring-oxm:3.0.0.M3" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+          <containerElement type="library" name="Maven: org.springframework:spring-webmvc:3.0.0.M3" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+          <containerElement type="library" name="Maven: org.aspectj:aspectjweaver:1.6.2" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+          <containerElement type="library" name="Maven: com.google.appengine:appengine-api-1.0-sdk:1.2.1" level="project">
+            <attribute name="method" value="1" />
+            <attribute name="URI" value="/WEB-INF/lib/" />
+          </containerElement>
+        </packaging>
+      </configuration>
+      <facet type="google-app-engine" name="Google App Engine">
+        <configuration>
+          <sdk-home-path>C:/java/platform/appengine-java-sdk-1.2.2</sdk-home-path>
+        </configuration>
+      </facet>
+    </facet>
+  </component>
+  <component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_5" inherit-compiler-output="false">
+    <output url="file://$MODULE_DIR$/target/classes" />
+    <output-test url="file://$MODULE_DIR$/target/test-classes" />
+    <content url="file://$MODULE_DIR$">
+      <sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/src/main/resources" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
+      <sourceFolder url="file://$MODULE_DIR$/src/test/resources" isTestSource="true" />
+      <excludeFolder url="file://$MODULE_DIR$/target" />
+    </content>
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+    <orderEntry type="module" module-name="droids-crawler" exported="" />
+    <orderEntry type="library" exported="" name="Maven: commons-logging:commons-logging:1.1.1" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.apache.httpcomponents:httpclient:4.0-beta2" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.apache.httpcomponents:httpcore:4.0" level="project" />
+    <orderEntry type="library" exported="" name="Maven: commons-codec:commons-codec:1.3" level="project" />
+    <orderEntry type="library" exported="" name="Maven: nekohtml:nekohtml:1.9.6.2" level="project" />
+    <orderEntry type="library" exported="" name="Maven: xerces:xercesImpl:2.8.1" level="project" />
+    <orderEntry type="library" exported="" name="Maven: commons-io:commons-io:1.4" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.codehaus.groovy:groovy-all:1.6.3" level="project" />
+    <orderEntry type="library" exported="" name="Maven: junit:junit:3.8.2" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.apache.ant:ant:1.7.1" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.apache.ant:ant-launcher:1.7.1" level="project" />
+    <orderEntry type="library" exported="" name="Maven: jline:jline:0.9.94" level="project" />
+    <orderEntry type="library" exported="" name="Maven: net.sourceforge.nekohtml:nekohtml:1.9.12" level="project" />
+    <orderEntry type="library" exported="" name="Maven: net.htmlparser.jericho:jericho-html:3.1" level="project" />
+    <orderEntry type="library" exported="" name="Maven: net.jcip:jcip-annotations:1.0" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.springframework:spring-core:3.0.0.M3" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.springframework:spring-asm:3.0.0.M3" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.springframework:spring-web:3.0.0.M3" level="project" />
+    <orderEntry type="library" exported="" name="Maven: com.caucho:hessian:3.1.5" level="project" />
+    <orderEntry type="library" exported="" name="Maven: com.caucho:burlap:2.1.12" level="project" />
+    <orderEntry type="library" exported="" name="Maven: aopalliance:aopalliance:1.0" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.springframework:spring-aop:3.0.0.M3" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.springframework:spring-beans:3.0.0.M3" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.springframework:spring-context:3.0.0.M3" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.springframework:spring-expression:3.0.0.M3" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.antlr:antlr:3.0.1" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.antlr:stringtemplate:3.1-b1" level="project" />
+    <orderEntry type="library" exported="" name="Maven: antlr:antlr:2.7.7" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.springframework:spring-oxm:3.0.0.M3" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.springframework:spring-webmvc:3.0.0.M3" level="project" />
+    <orderEntry type="library" exported="" name="Maven: org.aspectj:aspectjweaver:1.6.2" level="project" />
+    <orderEntry type="library" exported="" name="Maven: com.google.appengine:appengine-api-1.0-sdk:1.2.1" level="project" />
+  </component>
+</module>
+

Added: incubator/droids/trunk/droids-crawler-web/pom.xml
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler-web/pom.xml?rev=810279&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler-web/pom.xml (added)
+++ incubator/droids/trunk/droids-crawler-web/pom.xml Tue Sep  1 22:19:37 2009
@@ -0,0 +1,87 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~   http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <!--<parent>
+        <artifactId>droids</artifactId>
+        <groupId>org.apache.droids</groupId>
+        <version>0.1-incubating-SNAPSHOT</version>
+    </parent>-->
+    <modelVersion>4.0.0</modelVersion>
+    <groupId>org.apache.droids</groupId>
+    <artifactId>droids-crawler-web</artifactId>
+    <version>0.1</version>
+    <packaging>war</packaging>
+
+
+    <repositories>
+        <repository>
+            <id>Springframework milestone</id>
+            <url>http://maven.springframework.org/milestone</url>
+        </repository>
+    </repositories>
+
+    <dependencies>
+        <!-- module -->
+        <dependency>
+            <groupId>org.apache.droids</groupId>
+            <artifactId>droids-crawler</artifactId>
+            <version>0.1</version>
+            <type>jar</type>
+        </dependency>
+
+    <!-- Spring 3.0 -->
+        <dependency>
+            <groupId>org.springframework</groupId>
+            <artifactId>spring-core</artifactId>
+            <version>3.0.0.M3</version>
+        </dependency>
+        <dependency>
+            <groupId>org.springframework</groupId>
+            <artifactId>spring-web</artifactId>
+            <version>3.0.0.M3</version>
+        </dependency>
+        <dependency>
+            <groupId>org.springframework</groupId>
+            <artifactId>spring-webmvc</artifactId>
+            <version>3.0.0.M3</version>
+        </dependency>
+        <dependency>
+            <groupId>org.springframework</groupId>
+            <artifactId>spring-aop</artifactId>
+            <version>3.0.0.M3</version>
+        </dependency>
+        <dependency>
+            <groupId>org.aspectj</groupId>
+            <artifactId>aspectjweaver</artifactId>
+            <version>1.6.2</version>
+        </dependency>
+    </dependencies>
+    <build>
+        <plugins>
+            <plugin>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <configuration>
+                    <source>1.5</source>
+                    <target>1.5</target>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+</project>
\ No newline at end of file

Added: incubator/droids/trunk/droids-crawler-web/src/main/resources/spring-crawler-service.xml
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler-web/src/main/resources/spring-crawler-service.xml?rev=810279&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler-web/src/main/resources/spring-crawler-service.xml (added)
+++ incubator/droids/trunk/droids-crawler-web/src/main/resources/spring-crawler-service.xml Tue Sep  1 22:19:37 2009
@@ -0,0 +1,107 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~   http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+<beans xmlns="http://www.springframework.org/schema/beans"
+       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+       xmlns:p="http://www.springframework.org/schema/p"
+       xmlns:context="http://www.springframework.org/schema/context"
+       xmlns:aop="http://www.springframework.org/schema/aop"
+       xmlns:util="http://www.springframework.org/schema/util"
+       xsi:schemaLocation="http://www.springframework.org/schema/beans
+        http://www.springframework.org/schema/beans/spring-beans-2.5.xsd
+        http://www.springframework.org/schema/context
+        http://www.springframework.org/schema/context/spring-context-2.5.xsd http://www.springframework.org/schema/aop http://www.springframework.org/schema/aop/spring-aop.xsd
+        http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util-2.5.xsd">
+
+    <context:annotation-config/>
+
+    <bean class="org.apache.droids.crawler.filter.extract.IncludeFilter">
+        <constructor-arg index="0">
+            <util:list>
+                <value>.*</value>
+            </util:list>
+        </constructor-arg>
+    </bean>
+
+
+    <bean id="depthFilter" class="org.apache.droids.crawler.filter.DepthFilter">
+        <property name="maxDepth" value="1"/>
+    </bean>
+
+    <bean id="httpHeaderFilter" class="org.apache.droids.crawler.filter.fetch.HttpHeaderFilter" scope="prototype"/>
+
+    <bean id="linkAttributeFilter" class="org.apache.droids.crawler.filter.fetch.LinkAttributeFilter"
+          scope="prototype"/>
+
+    <bean id="refererFilter" class="org.apache.droids.crawler.filter.extract.RefererFilter" scope="prototype"/>
+
+
+    <bean id="stateFilter" class="org.apache.droids.crawler.filter.StateFilter"></bean>
+    <bean id="appEngineFetcher" class="org.apache.droids.crawler.fetcher.appengine.AppEngineFetcher">
+        <!--    <property name="filters">
+            <util:list>
+
+            </util:list>
+        </property>-->
+    </bean>
+
+    <bean id="htmlParser" class="org.apache.droids.crawler.parser.impl.NekoHtmlParser" scope="prototype">
+        <property name="elements">
+            <util:map key-type="java.lang.String" value-type="java.lang.String[]">
+                <entry key="A">
+                    <list>
+                        <value>href</value>
+                    </list>
+                </entry>
+            </util:map>
+        </property>
+    </bean>
+
+    <bean id="crawlerService" class="org.apache.droids.crawler.LocalCrawlerService">
+        <property name="fetcherFactory">
+            <bean class="org.apache.droids.crawler.fetcher.DefaultFetcherFactory">
+                <property name="fetchers">
+                    <util:list>
+                        <ref bean="appEngineFetcher"/>
+                    </util:list>
+                </property>
+            </bean>
+        </property>
+        <property name="parserFactory">
+            <bean class="org.apache.droids.crawler.parser.DefaultParserFactory">
+                <property name="parsers">
+                    <util:list>
+                        <ref bean="htmlParser"/>
+                    </util:list>
+                </property>
+            </bean>
+        </property>
+        <property name="extractors">
+            <util:list>
+                <bean id="htmlElementLinkExtractor"
+                      class="org.apache.droids.crawler.extractor.HtmlElementLinkExtractor"/>
+            </util:list>
+        </property>
+    </bean>
+
+    <bean name="/api/crawler" class="org.springframework.remoting.httpinvoker.HttpInvokerServiceExporter">
+        <property name="service" ref="crawlerService"/>
+        <property name="serviceInterface" value="org.apache.droids.crawler.CrawlerService"/>
+    </bean>
+
+
+</beans>
\ No newline at end of file

Added: incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/appengine-web.xml
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/appengine-web.xml?rev=810279&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/appengine-web.xml (added)
+++ incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/appengine-web.xml Tue Sep  1 22:19:37 2009
@@ -0,0 +1,39 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~   http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+<appengine-web-app xmlns="http://appengine.google.com/ns/1.0">
+    <application>YOUR_APP_ID</application>
+    <version>1</version>
+    <sessions-enabled>true</sessions-enabled>
+    <system-properties>
+        <property name="org.apache.commons.logging.Log" value="org.apache.commons.logging.impl.Jdk14Logger"/>
+        <property name="java.util.logging.config.file" value="WEB-INF/logging.properties"/>
+        <property name="file.encoding" value="UTF-8"/>
+        <property name="appengine.orm.disable.duplicate.emf.exception" value="true"/>
+    </system-properties>
+    <static-files>
+        <include path="/**.html"/>
+        <include path="/**.png"/>
+        <include path="/**.gif"/>
+        <include path="/**.jpg"/>
+        <include path="/**.txt"/>
+        <include path="/**.ico"/>
+        <include path="/**.js"/>
+        <include path="/**.css"/>
+    </static-files>
+
+</appengine-web-app>

Added: incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/logging.properties
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/logging.properties?rev=810279&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/logging.properties (added)
+++ incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/logging.properties Tue Sep  1 22:19:37 2009
@@ -0,0 +1,21 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+org.apache.droids.crawler.level=FINEST
+org.apache.droids.crawler.parser.impl.level=WARNING
+org.springframework.level=WARNING
+
+DataNucleus.level=WARNING

Added: incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/spring-web.xml
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/spring-web.xml?rev=810279&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/spring-web.xml (added)
+++ incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/spring-web.xml Tue Sep  1 22:19:37 2009
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~   http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+<beans xmlns="http://www.springframework.org/schema/beans"
+       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+       xmlns:p="http://www.springframework.org/schema/p"
+       xmlns:context="http://www.springframework.org/schema/context"
+       xmlns:aop="http://www.springframework.org/schema/aop"
+       xsi:schemaLocation="http://www.springframework.org/schema/beans
+        http://www.springframework.org/schema/beans/spring-beans-2.5.xsd
+        http://www.springframework.org/schema/context
+        http://www.springframework.org/schema/context/spring-context-2.5.xsd http://www.springframework.org/schema/aop http://www.springframework.org/schema/aop/spring-aop.xsd">
+
+
+    <context:annotation-config/>
+
+
+    <bean id="urlMapping" class="org.springframework.web.servlet.handler.BeanNameUrlHandlerMapping">
+        <property name="order" value="1"/>
+    </bean>
+
+    <!--<bean class="org.springframework.web.servlet.mvc.annotation.DefaultAnnotationHandlerMapping">
+        <property name="order" value="2"/>
+        <property name="interceptors">
+            <list>
+                <ref bean="searchInterceptor"/>
+            </list>
+        </property>
+    </bean>-->
+    <import resource="classpath:/spring-crawler-service.xml"/>
+
+</beans>
\ No newline at end of file

Added: incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/web.xml
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/web.xml?rev=810279&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/web.xml (added)
+++ incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/web.xml Tue Sep  1 22:19:37 2009
@@ -0,0 +1,50 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~   http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+<web-app xmlns="http://java.sun.com/xml/ns/javaee" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://java.sun.com/xml/ns/javaee http://java.sun.com/xml/ns/javaee/web-app_2_5.xsd"
+         version="2.5">
+    <context-param>
+        <param-name>contextConfigLocation</param-name>
+        <param-value>/WEB-INF/spring-web.xml</param-value>
+    </context-param>
+
+    <servlet>
+        <servlet-name>spring</servlet-name>
+        <servlet-class>org.springframework.web.servlet.DispatcherServlet</servlet-class>
+        <init-param>
+            <param-name>contextConfigLocation</param-name>
+            <param-value>/WEB-INF/spring-web.xml</param-value>
+        </init-param>
+        <load-on-startup>1</load-on-startup>
+    </servlet>
+
+    <servlet-mapping>
+        <servlet-name>spring</servlet-name>
+        <url-pattern>/</url-pattern>
+    </servlet-mapping>
+
+    <welcome-file-list>
+        <welcome-file>index.jsp</welcome-file>
+        <welcome-file>index.html</welcome-file>
+    </welcome-file-list>
+
+    <listener>
+        <listener-class>org.springframework.web.context.ContextLoaderListener</listener-class>
+    </listener>
+</web-app>
+

Added: incubator/droids/trunk/droids-crawler-web/src/test/java/org/apache/droids/crawler/RemoteCrawlerServiceTest.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler-web/src/test/java/org/apache/droids/crawler/RemoteCrawlerServiceTest.java?rev=810279&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler-web/src/test/java/org/apache/droids/crawler/RemoteCrawlerServiceTest.java (added)
+++ incubator/droids/trunk/droids-crawler-web/src/test/java/org/apache/droids/crawler/RemoteCrawlerServiceTest.java Tue Sep  1 22:19:37 2009
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.droids.crawler;
+
+import org.junit.Test;
+import static org.junit.Assert.*;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertEquals;
+import org.springframework.context.support.ClassPathXmlApplicationContext;
+import org.springframework.context.ApplicationContext;
+import org.apache.droids.crawler.fetcher.FetcherException;
+import org.apache.droids.crawler.fetcher.Fetcher;
+import org.apache.droids.crawler.parser.ParserException;
+import org.apache.droids.crawler.parser.Parser;
+import org.apache.droids.crawler.extractor.ExtractorException;
+
+import java.net.URISyntaxException;
+import java.util.Set;
+import java.util.Map;
+
+/**
+ * This is a direct clone of LocalCrawlerServiseTest, with context xml changed.
+ */
+public class RemoteCrawlerServiceTest{
+    ApplicationContext context = new ClassPathXmlApplicationContext("test-RemoteCrawlerServiceTest.xml");
+    static String url = "http://www.apache.org";
+
+    @Test public void testFetch() throws URISyntaxException, FetcherException{
+        CrawlerService crawlerService = context.getBean("crawlerService", CrawlerService.class);
+        Assert.assertNotNull(crawlerService);
+        Link fetchedLink = crawlerService.fetch(new Link(url));
+        Assert.assertNotNull(fetchedLink);
+        Assert.assertEquals(Link.State.FETCHED, fetchedLink.getState());
+        Assert.assertNotNull(fetchedLink.get("fetched"));
+        Assert.assertTrue(fetchedLink.get("fetched") instanceof Fetcher);
+        Assert.assertNotNull(fetchedLink.get("fetched", Fetcher.class).getEntity());
+    }
+
+    @Test public void testFetchParse() throws URISyntaxException, FetcherException, ParserException{
+        CrawlerService crawlerService = context.getBean("crawlerService", CrawlerService.class);
+        Link fetchedLink = crawlerService.fetch(new Link(url));
+        Link parsedLink = crawlerService.parse(fetchedLink);
+        Assert.assertEquals(Link.State.PARSED, parsedLink.getState());
+        Assert.assertTrue(parsedLink.get("parsed") instanceof Parser);
+        Assert.assertNotNull(parsedLink.get("parsed", Parser.class).getData());
+    }
+
+    @Test public void testParseWithoutFetch() throws URISyntaxException, FetcherException, ParserException{
+        CrawlerService crawlerService = context.getBean("crawlerService", CrawlerService.class);
+        Link parsedLink = crawlerService.parse(new Link(url));
+        Assert.assertEquals(Link.State.PARSED, parsedLink.getState());
+        Assert.assertTrue(parsedLink.get("parsed") instanceof Parser);
+        Map<String, Map<String, Set<String>>> data = (Map<String, Map<String, Set<String>>>) parsedLink.get("parsed", Parser.class).getData();
+        Assert.assertNotNull(data);
+        Assert.assertEquals(1, data.size());
+        //assertEquals(3, data.size());
+    }
+
+    @Test public void testExtractWithoutFetchAndParse() throws ExtractorException, URISyntaxException{
+        CrawlerService crawlerService = context.getBean("crawlerService", CrawlerService.class);
+        Link extractedLink = crawlerService.extract(new Link(url));
+        Assert.assertFalse(extractedLink.containsKey("parsed"));
+        //System.out.println("link: " + extractedLink);
+        Assert.assertEquals(Link.State.EXTRACTED, extractedLink.getState());
+        Assert.assertTrue(extractedLink.containsKey("extracted"));
+        Assert.assertTrue(extractedLink.get("extracted") instanceof Set);
+        Set<Link> outlinks = extractedLink.get("extracted", Set.class);
+        //System.out.println(outlinks);
+        Assert.assertTrue(outlinks.size() > 100);
+    }
+}

Added: incubator/droids/trunk/droids-crawler-web/src/test/resources/test-RemoteCrawlerServiceTest.xml
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler-web/src/test/resources/test-RemoteCrawlerServiceTest.xml?rev=810279&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler-web/src/test/resources/test-RemoteCrawlerServiceTest.xml (added)
+++ incubator/droids/trunk/droids-crawler-web/src/test/resources/test-RemoteCrawlerServiceTest.xml Tue Sep  1 22:19:37 2009
@@ -0,0 +1,39 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~   http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+<beans xmlns="http://www.springframework.org/schema/beans"
+       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+       xmlns:p="http://www.springframework.org/schema/p"
+       xmlns:context="http://www.springframework.org/schema/context"
+       xmlns:aop="http://www.springframework.org/schema/aop"
+       xmlns:util="http://www.springframework.org/schema/util"
+       xsi:schemaLocation="http://www.springframework.org/schema/beans
+        http://www.springframework.org/schema/beans/spring-beans-2.5.xsd
+        http://www.springframework.org/schema/context
+        http://www.springframework.org/schema/context/spring-context-2.5.xsd http://www.springframework.org/schema/aop http://www.springframework.org/schema/aop/spring-aop.xsd
+        http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util-2.5.xsd">
+
+    <context:annotation-config/>
+    <!--<context:component-scan base-package="org.apache.droids.crawler"/>-->
+
+    <bean id="crawlerService" class="org.springframework.remoting.httpinvoker.HttpInvokerProxyFactoryBean">
+        <property name="serviceUrl" value="http://YOUR_APP_ID.appspot.com/YOUR_API_PATH"/>
+        <property name="serviceInterface" value="org.apache.droids.crawler.CrawlerService"/>
+    </bean>
+
+</beans>
\ No newline at end of file