You are viewing a plain text version of this content. The canonical link for it is here.
Posted to droids-commits@incubator.apache.org by mi...@apache.org on 2009/09/01 22:19:38 UTC
svn commit: r810279 - in /incubator/droids/trunk/droids-crawler-web: ./ src/
src/main/ src/main/java/ src/main/resources/ src/main/webapp/
src/main/webapp/WEB-INF/ src/test/ src/test/java/ src/test/java/org/
src/test/java/org/apache/ src/test/java/org/...
Author: mingfai
Date: Tue Sep 1 22:19:37 2009
New Revision: 810279
URL: http://svn.apache.org/viewvc?rev=810279&view=rev
Log:
imported the initial crawler web services codebase
Added:
incubator/droids/trunk/droids-crawler-web/README.txt
incubator/droids/trunk/droids-crawler-web/droids-crawler-web.iml
incubator/droids/trunk/droids-crawler-web/pom.xml
incubator/droids/trunk/droids-crawler-web/src/
incubator/droids/trunk/droids-crawler-web/src/main/
incubator/droids/trunk/droids-crawler-web/src/main/java/
incubator/droids/trunk/droids-crawler-web/src/main/resources/
incubator/droids/trunk/droids-crawler-web/src/main/resources/spring-crawler-service.xml
incubator/droids/trunk/droids-crawler-web/src/main/webapp/
incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/
incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/appengine-web.xml
incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/logging.properties
incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/spring-web.xml
incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/web.xml
incubator/droids/trunk/droids-crawler-web/src/test/
incubator/droids/trunk/droids-crawler-web/src/test/java/
incubator/droids/trunk/droids-crawler-web/src/test/java/org/
incubator/droids/trunk/droids-crawler-web/src/test/java/org/apache/
incubator/droids/trunk/droids-crawler-web/src/test/java/org/apache/droids/
incubator/droids/trunk/droids-crawler-web/src/test/java/org/apache/droids/crawler/
incubator/droids/trunk/droids-crawler-web/src/test/java/org/apache/droids/crawler/RemoteCrawlerServiceTest.java
incubator/droids/trunk/droids-crawler-web/src/test/resources/
incubator/droids/trunk/droids-crawler-web/src/test/resources/test-RemoteCrawlerServiceTest.xml
Added: incubator/droids/trunk/droids-crawler-web/README.txt
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler-web/README.txt?rev=810279&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler-web/README.txt (added)
+++ incubator/droids/trunk/droids-crawler-web/README.txt Tue Sep 1 22:19:37 2009
@@ -0,0 +1,11 @@
+this is the web service module for droids-crawler. as a JEE project, it is expected to be deployed to any JEE web
+container. It also support Google App Engine (and is actually designed for GAE for its unlimited scalability)
+
+Notice that the test cases in this module may depend on a deployed web services. i.e. the test case makes HTTP
+connection to the deployed service to perform testing.
+
+For GAE deployment:
+1. update the YOUR_APP_ID in /src/main/webapp/WEB-INF/appengine-web.xml, and execute the corresponding GAE deployment
+ command.
+
+2. update the YOUR_APP_ID in /src/test/resources/test-*.xml and deployment path for executing test cases.
\ No newline at end of file
Added: incubator/droids/trunk/droids-crawler-web/droids-crawler-web.iml
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler-web/droids-crawler-web.iml?rev=810279&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler-web/droids-crawler-web.iml (added)
+++ incubator/droids/trunk/droids-crawler-web/droids-crawler-web.iml Tue Sep 1 22:19:37 2009
@@ -0,0 +1,225 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module relativePaths="true" MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
+ <component name="FacetManager">
+ <facet type="web" name="Web">
+ <configuration>
+ <descriptors>
+ <deploymentDescriptor name="web.xml" url="file://$MODULE_DIR$/src/main/webapp/WEB-INF/web.xml" />
+ </descriptors>
+ <webroots>
+ <root url="file://$MODULE_DIR$/src/main/webapp" relative="/" />
+ </webroots>
+ <sourceRoots>
+ <root url="file://$MODULE_DIR$/src/main/java" />
+ <root url="file://$MODULE_DIR$/src/main/resources" />
+ <root url="file://$MODULE_DIR$/src/test/java" />
+ </sourceRoots>
+ <building>
+ <setting name="EXPLODED_URL" value="file://$MODULE_DIR$/../out/exploded/droids-crawler-webWeb" />
+ <setting name="EXPLODED_ENABLED" value="true" />
+ <setting name="JAR_URL" value="file://" />
+ <setting name="JAR_ENABLED" value="false" />
+ <setting name="EXCLUDE_EXPLODED_DIRECTORY" value="true" />
+ </building>
+ <packaging>
+ <containerElement type="module" name="droids-crawler-web">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/classes" />
+ </containerElement>
+ <containerElement type="module" name="droids-crawler">
+ <attribute name="method" value="5" />
+ <attribute name="URI" value="/WEB-INF/lib/droids-crawler-0.1.jar" />
+ </containerElement>
+ <containerElement type="module" name="droids-core">
+ <attribute name="method" value="5" />
+ <attribute name="URI" value="/WEB-INF/lib/droids-core-0.1-incubating-SNAPSHOT.jar" />
+ </containerElement>
+ <containerElement type="module" name="droids-norobots">
+ <attribute name="method" value="5" />
+ <attribute name="URI" value="/WEB-INF/lib/droids-norobots-0.1-incubating-SNAPSHOT.jar" />
+ </containerElement>
+ <containerElement type="library" name="Maven: commons-logging:commons-logging:1.1.1" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ <containerElement type="library" name="Maven: org.apache.httpcomponents:httpclient:4.0-beta2" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ <containerElement type="library" name="Maven: org.apache.httpcomponents:httpcore:4.0" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ <containerElement type="library" name="Maven: commons-codec:commons-codec:1.3" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ <containerElement type="library" name="Maven: nekohtml:nekohtml:1.9.6.2" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ <containerElement type="library" name="Maven: xerces:xercesImpl:2.8.1" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ <containerElement type="library" name="Maven: commons-io:commons-io:1.4" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ <containerElement type="library" name="Maven: org.codehaus.groovy:groovy-all:1.6.3" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ <containerElement type="library" name="Maven: junit:junit:3.8.2" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ <containerElement type="library" name="Maven: org.apache.ant:ant:1.7.1" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ <containerElement type="library" name="Maven: org.apache.ant:ant-launcher:1.7.1" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ <containerElement type="library" name="Maven: jline:jline:0.9.94" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ <containerElement type="library" name="Maven: net.sourceforge.nekohtml:nekohtml:1.9.12" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ <containerElement type="library" name="Maven: net.htmlparser.jericho:jericho-html:3.1" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ <containerElement type="library" name="Maven: net.jcip:jcip-annotations:1.0" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ <containerElement type="library" name="Maven: org.springframework:spring-core:3.0.0.M3" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ <containerElement type="library" name="Maven: org.springframework:spring-asm:3.0.0.M3" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ <containerElement type="library" name="Maven: org.springframework:spring-web:3.0.0.M3" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ <containerElement type="library" name="Maven: com.caucho:hessian:3.1.5" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ <containerElement type="library" name="Maven: com.caucho:burlap:2.1.12" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ <containerElement type="library" name="Maven: aopalliance:aopalliance:1.0" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ <containerElement type="library" name="Maven: org.springframework:spring-aop:3.0.0.M3" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ <containerElement type="library" name="Maven: org.springframework:spring-beans:3.0.0.M3" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ <containerElement type="library" name="Maven: org.springframework:spring-context:3.0.0.M3" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ <containerElement type="library" name="Maven: org.springframework:spring-expression:3.0.0.M3" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ <containerElement type="library" name="Maven: org.antlr:antlr:3.0.1" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ <containerElement type="library" name="Maven: org.antlr:stringtemplate:3.1-b1" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ <containerElement type="library" name="Maven: antlr:antlr:2.7.7" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ <containerElement type="library" name="Maven: org.springframework:spring-oxm:3.0.0.M3" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ <containerElement type="library" name="Maven: org.springframework:spring-webmvc:3.0.0.M3" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ <containerElement type="library" name="Maven: org.aspectj:aspectjweaver:1.6.2" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ <containerElement type="library" name="Maven: com.google.appengine:appengine-api-1.0-sdk:1.2.1" level="project">
+ <attribute name="method" value="1" />
+ <attribute name="URI" value="/WEB-INF/lib/" />
+ </containerElement>
+ </packaging>
+ </configuration>
+ <facet type="google-app-engine" name="Google App Engine">
+ <configuration>
+ <sdk-home-path>C:/java/platform/appengine-java-sdk-1.2.2</sdk-home-path>
+ </configuration>
+ </facet>
+ </facet>
+ </component>
+ <component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_5" inherit-compiler-output="false">
+ <output url="file://$MODULE_DIR$/target/classes" />
+ <output-test url="file://$MODULE_DIR$/target/test-classes" />
+ <content url="file://$MODULE_DIR$">
+ <sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
+ <sourceFolder url="file://$MODULE_DIR$/src/main/resources" isTestSource="false" />
+ <sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
+ <sourceFolder url="file://$MODULE_DIR$/src/test/resources" isTestSource="true" />
+ <excludeFolder url="file://$MODULE_DIR$/target" />
+ </content>
+ <orderEntry type="inheritedJdk" />
+ <orderEntry type="sourceFolder" forTests="false" />
+ <orderEntry type="module" module-name="droids-crawler" exported="" />
+ <orderEntry type="library" exported="" name="Maven: commons-logging:commons-logging:1.1.1" level="project" />
+ <orderEntry type="library" exported="" name="Maven: org.apache.httpcomponents:httpclient:4.0-beta2" level="project" />
+ <orderEntry type="library" exported="" name="Maven: org.apache.httpcomponents:httpcore:4.0" level="project" />
+ <orderEntry type="library" exported="" name="Maven: commons-codec:commons-codec:1.3" level="project" />
+ <orderEntry type="library" exported="" name="Maven: nekohtml:nekohtml:1.9.6.2" level="project" />
+ <orderEntry type="library" exported="" name="Maven: xerces:xercesImpl:2.8.1" level="project" />
+ <orderEntry type="library" exported="" name="Maven: commons-io:commons-io:1.4" level="project" />
+ <orderEntry type="library" exported="" name="Maven: org.codehaus.groovy:groovy-all:1.6.3" level="project" />
+ <orderEntry type="library" exported="" name="Maven: junit:junit:3.8.2" level="project" />
+ <orderEntry type="library" exported="" name="Maven: org.apache.ant:ant:1.7.1" level="project" />
+ <orderEntry type="library" exported="" name="Maven: org.apache.ant:ant-launcher:1.7.1" level="project" />
+ <orderEntry type="library" exported="" name="Maven: jline:jline:0.9.94" level="project" />
+ <orderEntry type="library" exported="" name="Maven: net.sourceforge.nekohtml:nekohtml:1.9.12" level="project" />
+ <orderEntry type="library" exported="" name="Maven: net.htmlparser.jericho:jericho-html:3.1" level="project" />
+ <orderEntry type="library" exported="" name="Maven: net.jcip:jcip-annotations:1.0" level="project" />
+ <orderEntry type="library" exported="" name="Maven: org.springframework:spring-core:3.0.0.M3" level="project" />
+ <orderEntry type="library" exported="" name="Maven: org.springframework:spring-asm:3.0.0.M3" level="project" />
+ <orderEntry type="library" exported="" name="Maven: org.springframework:spring-web:3.0.0.M3" level="project" />
+ <orderEntry type="library" exported="" name="Maven: com.caucho:hessian:3.1.5" level="project" />
+ <orderEntry type="library" exported="" name="Maven: com.caucho:burlap:2.1.12" level="project" />
+ <orderEntry type="library" exported="" name="Maven: aopalliance:aopalliance:1.0" level="project" />
+ <orderEntry type="library" exported="" name="Maven: org.springframework:spring-aop:3.0.0.M3" level="project" />
+ <orderEntry type="library" exported="" name="Maven: org.springframework:spring-beans:3.0.0.M3" level="project" />
+ <orderEntry type="library" exported="" name="Maven: org.springframework:spring-context:3.0.0.M3" level="project" />
+ <orderEntry type="library" exported="" name="Maven: org.springframework:spring-expression:3.0.0.M3" level="project" />
+ <orderEntry type="library" exported="" name="Maven: org.antlr:antlr:3.0.1" level="project" />
+ <orderEntry type="library" exported="" name="Maven: org.antlr:stringtemplate:3.1-b1" level="project" />
+ <orderEntry type="library" exported="" name="Maven: antlr:antlr:2.7.7" level="project" />
+ <orderEntry type="library" exported="" name="Maven: org.springframework:spring-oxm:3.0.0.M3" level="project" />
+ <orderEntry type="library" exported="" name="Maven: org.springframework:spring-webmvc:3.0.0.M3" level="project" />
+ <orderEntry type="library" exported="" name="Maven: org.aspectj:aspectjweaver:1.6.2" level="project" />
+ <orderEntry type="library" exported="" name="Maven: com.google.appengine:appengine-api-1.0-sdk:1.2.1" level="project" />
+ </component>
+</module>
+
Added: incubator/droids/trunk/droids-crawler-web/pom.xml
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler-web/pom.xml?rev=810279&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler-web/pom.xml (added)
+++ incubator/droids/trunk/droids-crawler-web/pom.xml Tue Sep 1 22:19:37 2009
@@ -0,0 +1,87 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one or more
+ ~ contributor license agreements. See the NOTICE file distributed with
+ ~ this work for additional information regarding copyright ownership.
+ ~ The ASF licenses this file to You under the Apache License, Version 2.0
+ ~ (the "License"); you may not use this file except in compliance with
+ ~ the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <!--<parent>
+ <artifactId>droids</artifactId>
+ <groupId>org.apache.droids</groupId>
+ <version>0.1-incubating-SNAPSHOT</version>
+ </parent>-->
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>org.apache.droids</groupId>
+ <artifactId>droids-crawler-web</artifactId>
+ <version>0.1</version>
+ <packaging>war</packaging>
+
+
+ <repositories>
+ <repository>
+ <id>Springframework milestone</id>
+ <url>http://maven.springframework.org/milestone</url>
+ </repository>
+ </repositories>
+
+ <dependencies>
+ <!-- module -->
+ <dependency>
+ <groupId>org.apache.droids</groupId>
+ <artifactId>droids-crawler</artifactId>
+ <version>0.1</version>
+ <type>jar</type>
+ </dependency>
+
+ <!-- Spring 3.0 -->
+ <dependency>
+ <groupId>org.springframework</groupId>
+ <artifactId>spring-core</artifactId>
+ <version>3.0.0.M3</version>
+ </dependency>
+ <dependency>
+ <groupId>org.springframework</groupId>
+ <artifactId>spring-web</artifactId>
+ <version>3.0.0.M3</version>
+ </dependency>
+ <dependency>
+ <groupId>org.springframework</groupId>
+ <artifactId>spring-webmvc</artifactId>
+ <version>3.0.0.M3</version>
+ </dependency>
+ <dependency>
+ <groupId>org.springframework</groupId>
+ <artifactId>spring-aop</artifactId>
+ <version>3.0.0.M3</version>
+ </dependency>
+ <dependency>
+ <groupId>org.aspectj</groupId>
+ <artifactId>aspectjweaver</artifactId>
+ <version>1.6.2</version>
+ </dependency>
+ </dependencies>
+ <build>
+ <plugins>
+ <plugin>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <configuration>
+ <source>1.5</source>
+ <target>1.5</target>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+</project>
\ No newline at end of file
Added: incubator/droids/trunk/droids-crawler-web/src/main/resources/spring-crawler-service.xml
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler-web/src/main/resources/spring-crawler-service.xml?rev=810279&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler-web/src/main/resources/spring-crawler-service.xml (added)
+++ incubator/droids/trunk/droids-crawler-web/src/main/resources/spring-crawler-service.xml Tue Sep 1 22:19:37 2009
@@ -0,0 +1,107 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one or more
+ ~ contributor license agreements. See the NOTICE file distributed with
+ ~ this work for additional information regarding copyright ownership.
+ ~ The ASF licenses this file to You under the Apache License, Version 2.0
+ ~ (the "License"); you may not use this file except in compliance with
+ ~ the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+<beans xmlns="http://www.springframework.org/schema/beans"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:p="http://www.springframework.org/schema/p"
+ xmlns:context="http://www.springframework.org/schema/context"
+ xmlns:aop="http://www.springframework.org/schema/aop"
+ xmlns:util="http://www.springframework.org/schema/util"
+ xsi:schemaLocation="http://www.springframework.org/schema/beans
+ http://www.springframework.org/schema/beans/spring-beans-2.5.xsd
+ http://www.springframework.org/schema/context
+ http://www.springframework.org/schema/context/spring-context-2.5.xsd http://www.springframework.org/schema/aop http://www.springframework.org/schema/aop/spring-aop.xsd
+ http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util-2.5.xsd">
+
+ <context:annotation-config/>
+
+ <bean class="org.apache.droids.crawler.filter.extract.IncludeFilter">
+ <constructor-arg index="0">
+ <util:list>
+ <value>.*</value>
+ </util:list>
+ </constructor-arg>
+ </bean>
+
+
+ <bean id="depthFilter" class="org.apache.droids.crawler.filter.DepthFilter">
+ <property name="maxDepth" value="1"/>
+ </bean>
+
+ <bean id="httpHeaderFilter" class="org.apache.droids.crawler.filter.fetch.HttpHeaderFilter" scope="prototype"/>
+
+ <bean id="linkAttributeFilter" class="org.apache.droids.crawler.filter.fetch.LinkAttributeFilter"
+ scope="prototype"/>
+
+ <bean id="refererFilter" class="org.apache.droids.crawler.filter.extract.RefererFilter" scope="prototype"/>
+
+
+ <bean id="stateFilter" class="org.apache.droids.crawler.filter.StateFilter"></bean>
+ <bean id="appEngineFetcher" class="org.apache.droids.crawler.fetcher.appengine.AppEngineFetcher">
+ <!-- <property name="filters">
+ <util:list>
+
+ </util:list>
+ </property>-->
+ </bean>
+
+ <bean id="htmlParser" class="org.apache.droids.crawler.parser.impl.NekoHtmlParser" scope="prototype">
+ <property name="elements">
+ <util:map key-type="java.lang.String" value-type="java.lang.String[]">
+ <entry key="A">
+ <list>
+ <value>href</value>
+ </list>
+ </entry>
+ </util:map>
+ </property>
+ </bean>
+
+ <bean id="crawlerService" class="org.apache.droids.crawler.LocalCrawlerService">
+ <property name="fetcherFactory">
+ <bean class="org.apache.droids.crawler.fetcher.DefaultFetcherFactory">
+ <property name="fetchers">
+ <util:list>
+ <ref bean="appEngineFetcher"/>
+ </util:list>
+ </property>
+ </bean>
+ </property>
+ <property name="parserFactory">
+ <bean class="org.apache.droids.crawler.parser.DefaultParserFactory">
+ <property name="parsers">
+ <util:list>
+ <ref bean="htmlParser"/>
+ </util:list>
+ </property>
+ </bean>
+ </property>
+ <property name="extractors">
+ <util:list>
+ <bean id="htmlElementLinkExtractor"
+ class="org.apache.droids.crawler.extractor.HtmlElementLinkExtractor"/>
+ </util:list>
+ </property>
+ </bean>
+
+ <bean name="/api/crawler" class="org.springframework.remoting.httpinvoker.HttpInvokerServiceExporter">
+ <property name="service" ref="crawlerService"/>
+ <property name="serviceInterface" value="org.apache.droids.crawler.CrawlerService"/>
+ </bean>
+
+
+</beans>
\ No newline at end of file
Added: incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/appengine-web.xml
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/appengine-web.xml?rev=810279&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/appengine-web.xml (added)
+++ incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/appengine-web.xml Tue Sep 1 22:19:37 2009
@@ -0,0 +1,39 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one or more
+ ~ contributor license agreements. See the NOTICE file distributed with
+ ~ this work for additional information regarding copyright ownership.
+ ~ The ASF licenses this file to You under the Apache License, Version 2.0
+ ~ (the "License"); you may not use this file except in compliance with
+ ~ the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+<appengine-web-app xmlns="http://appengine.google.com/ns/1.0">
+ <application>YOUR_APP_ID</application>
+ <version>1</version>
+ <sessions-enabled>true</sessions-enabled>
+ <system-properties>
+ <property name="org.apache.commons.logging.Log" value="org.apache.commons.logging.impl.Jdk14Logger"/>
+ <property name="java.util.logging.config.file" value="WEB-INF/logging.properties"/>
+ <property name="file.encoding" value="UTF-8"/>
+ <property name="appengine.orm.disable.duplicate.emf.exception" value="true"/>
+ </system-properties>
+ <static-files>
+ <include path="/**.html"/>
+ <include path="/**.png"/>
+ <include path="/**.gif"/>
+ <include path="/**.jpg"/>
+ <include path="/**.txt"/>
+ <include path="/**.ico"/>
+ <include path="/**.js"/>
+ <include path="/**.css"/>
+ </static-files>
+
+</appengine-web-app>
Added: incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/logging.properties
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/logging.properties?rev=810279&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/logging.properties (added)
+++ incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/logging.properties Tue Sep 1 22:19:37 2009
@@ -0,0 +1,21 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+org.apache.droids.crawler.level=FINEST
+org.apache.droids.crawler.parser.impl.level=WARNING
+org.springframework.level=WARNING
+
+DataNucleus.level=WARNING
Added: incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/spring-web.xml
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/spring-web.xml?rev=810279&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/spring-web.xml (added)
+++ incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/spring-web.xml Tue Sep 1 22:19:37 2009
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one or more
+ ~ contributor license agreements. See the NOTICE file distributed with
+ ~ this work for additional information regarding copyright ownership.
+ ~ The ASF licenses this file to You under the Apache License, Version 2.0
+ ~ (the "License"); you may not use this file except in compliance with
+ ~ the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+<beans xmlns="http://www.springframework.org/schema/beans"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:p="http://www.springframework.org/schema/p"
+ xmlns:context="http://www.springframework.org/schema/context"
+ xmlns:aop="http://www.springframework.org/schema/aop"
+ xsi:schemaLocation="http://www.springframework.org/schema/beans
+ http://www.springframework.org/schema/beans/spring-beans-2.5.xsd
+ http://www.springframework.org/schema/context
+ http://www.springframework.org/schema/context/spring-context-2.5.xsd http://www.springframework.org/schema/aop http://www.springframework.org/schema/aop/spring-aop.xsd">
+
+
+ <context:annotation-config/>
+
+
+ <bean id="urlMapping" class="org.springframework.web.servlet.handler.BeanNameUrlHandlerMapping">
+ <property name="order" value="1"/>
+ </bean>
+
+ <!--<bean class="org.springframework.web.servlet.mvc.annotation.DefaultAnnotationHandlerMapping">
+ <property name="order" value="2"/>
+ <property name="interceptors">
+ <list>
+ <ref bean="searchInterceptor"/>
+ </list>
+ </property>
+ </bean>-->
+ <import resource="classpath:/spring-crawler-service.xml"/>
+
+</beans>
\ No newline at end of file
Added: incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/web.xml
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/web.xml?rev=810279&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/web.xml (added)
+++ incubator/droids/trunk/droids-crawler-web/src/main/webapp/WEB-INF/web.xml Tue Sep 1 22:19:37 2009
@@ -0,0 +1,50 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one or more
+ ~ contributor license agreements. See the NOTICE file distributed with
+ ~ this work for additional information regarding copyright ownership.
+ ~ The ASF licenses this file to You under the Apache License, Version 2.0
+ ~ (the "License"); you may not use this file except in compliance with
+ ~ the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+<web-app xmlns="http://java.sun.com/xml/ns/javaee" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://java.sun.com/xml/ns/javaee http://java.sun.com/xml/ns/javaee/web-app_2_5.xsd"
+ version="2.5">
+ <context-param>
+ <param-name>contextConfigLocation</param-name>
+ <param-value>/WEB-INF/spring-web.xml</param-value>
+ </context-param>
+
+ <servlet>
+ <servlet-name>spring</servlet-name>
+ <servlet-class>org.springframework.web.servlet.DispatcherServlet</servlet-class>
+ <init-param>
+ <param-name>contextConfigLocation</param-name>
+ <param-value>/WEB-INF/spring-web.xml</param-value>
+ </init-param>
+ <load-on-startup>1</load-on-startup>
+ </servlet>
+
+ <servlet-mapping>
+ <servlet-name>spring</servlet-name>
+ <url-pattern>/</url-pattern>
+ </servlet-mapping>
+
+ <welcome-file-list>
+ <welcome-file>index.jsp</welcome-file>
+ <welcome-file>index.html</welcome-file>
+ </welcome-file-list>
+
+ <listener>
+ <listener-class>org.springframework.web.context.ContextLoaderListener</listener-class>
+ </listener>
+</web-app>
+
Added: incubator/droids/trunk/droids-crawler-web/src/test/java/org/apache/droids/crawler/RemoteCrawlerServiceTest.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler-web/src/test/java/org/apache/droids/crawler/RemoteCrawlerServiceTest.java?rev=810279&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler-web/src/test/java/org/apache/droids/crawler/RemoteCrawlerServiceTest.java (added)
+++ incubator/droids/trunk/droids-crawler-web/src/test/java/org/apache/droids/crawler/RemoteCrawlerServiceTest.java Tue Sep 1 22:19:37 2009
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.droids.crawler;
+
+import org.junit.Test;
+import static org.junit.Assert.*;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertEquals;
+import org.springframework.context.support.ClassPathXmlApplicationContext;
+import org.springframework.context.ApplicationContext;
+import org.apache.droids.crawler.fetcher.FetcherException;
+import org.apache.droids.crawler.fetcher.Fetcher;
+import org.apache.droids.crawler.parser.ParserException;
+import org.apache.droids.crawler.parser.Parser;
+import org.apache.droids.crawler.extractor.ExtractorException;
+
+import java.net.URISyntaxException;
+import java.util.Set;
+import java.util.Map;
+
+/**
+ * This is a direct clone of LocalCrawlerServiseTest, with context xml changed.
+ */
+public class RemoteCrawlerServiceTest{
+ ApplicationContext context = new ClassPathXmlApplicationContext("test-RemoteCrawlerServiceTest.xml");
+ static String url = "http://www.apache.org";
+
+ @Test public void testFetch() throws URISyntaxException, FetcherException{
+ CrawlerService crawlerService = context.getBean("crawlerService", CrawlerService.class);
+ Assert.assertNotNull(crawlerService);
+ Link fetchedLink = crawlerService.fetch(new Link(url));
+ Assert.assertNotNull(fetchedLink);
+ Assert.assertEquals(Link.State.FETCHED, fetchedLink.getState());
+ Assert.assertNotNull(fetchedLink.get("fetched"));
+ Assert.assertTrue(fetchedLink.get("fetched") instanceof Fetcher);
+ Assert.assertNotNull(fetchedLink.get("fetched", Fetcher.class).getEntity());
+ }
+
+ @Test public void testFetchParse() throws URISyntaxException, FetcherException, ParserException{
+ CrawlerService crawlerService = context.getBean("crawlerService", CrawlerService.class);
+ Link fetchedLink = crawlerService.fetch(new Link(url));
+ Link parsedLink = crawlerService.parse(fetchedLink);
+ Assert.assertEquals(Link.State.PARSED, parsedLink.getState());
+ Assert.assertTrue(parsedLink.get("parsed") instanceof Parser);
+ Assert.assertNotNull(parsedLink.get("parsed", Parser.class).getData());
+ }
+
+ @Test public void testParseWithoutFetch() throws URISyntaxException, FetcherException, ParserException{
+ CrawlerService crawlerService = context.getBean("crawlerService", CrawlerService.class);
+ Link parsedLink = crawlerService.parse(new Link(url));
+ Assert.assertEquals(Link.State.PARSED, parsedLink.getState());
+ Assert.assertTrue(parsedLink.get("parsed") instanceof Parser);
+ Map<String, Map<String, Set<String>>> data = (Map<String, Map<String, Set<String>>>) parsedLink.get("parsed", Parser.class).getData();
+ Assert.assertNotNull(data);
+ Assert.assertEquals(1, data.size());
+ //assertEquals(3, data.size());
+ }
+
+ @Test public void testExtractWithoutFetchAndParse() throws ExtractorException, URISyntaxException{
+ CrawlerService crawlerService = context.getBean("crawlerService", CrawlerService.class);
+ Link extractedLink = crawlerService.extract(new Link(url));
+ Assert.assertFalse(extractedLink.containsKey("parsed"));
+ //System.out.println("link: " + extractedLink);
+ Assert.assertEquals(Link.State.EXTRACTED, extractedLink.getState());
+ Assert.assertTrue(extractedLink.containsKey("extracted"));
+ Assert.assertTrue(extractedLink.get("extracted") instanceof Set);
+ Set<Link> outlinks = extractedLink.get("extracted", Set.class);
+ //System.out.println(outlinks);
+ Assert.assertTrue(outlinks.size() > 100);
+ }
+}
Added: incubator/droids/trunk/droids-crawler-web/src/test/resources/test-RemoteCrawlerServiceTest.xml
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler-web/src/test/resources/test-RemoteCrawlerServiceTest.xml?rev=810279&view=auto
==============================================================================
--- incubator/droids/trunk/droids-crawler-web/src/test/resources/test-RemoteCrawlerServiceTest.xml (added)
+++ incubator/droids/trunk/droids-crawler-web/src/test/resources/test-RemoteCrawlerServiceTest.xml Tue Sep 1 22:19:37 2009
@@ -0,0 +1,39 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one or more
+ ~ contributor license agreements. See the NOTICE file distributed with
+ ~ this work for additional information regarding copyright ownership.
+ ~ The ASF licenses this file to You under the Apache License, Version 2.0
+ ~ (the "License"); you may not use this file except in compliance with
+ ~ the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+<beans xmlns="http://www.springframework.org/schema/beans"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:p="http://www.springframework.org/schema/p"
+ xmlns:context="http://www.springframework.org/schema/context"
+ xmlns:aop="http://www.springframework.org/schema/aop"
+ xmlns:util="http://www.springframework.org/schema/util"
+ xsi:schemaLocation="http://www.springframework.org/schema/beans
+ http://www.springframework.org/schema/beans/spring-beans-2.5.xsd
+ http://www.springframework.org/schema/context
+ http://www.springframework.org/schema/context/spring-context-2.5.xsd http://www.springframework.org/schema/aop http://www.springframework.org/schema/aop/spring-aop.xsd
+ http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util-2.5.xsd">
+
+ <context:annotation-config/>
+ <!--<context:component-scan base-package="org.apache.droids.crawler"/>-->
+
+ <bean id="crawlerService" class="org.springframework.remoting.httpinvoker.HttpInvokerProxyFactoryBean">
+ <property name="serviceUrl" value="http://YOUR_APP_ID.appspot.com/YOUR_API_PATH"/>
+ <property name="serviceInterface" value="org.apache.droids.crawler.CrawlerService"/>
+ </bean>
+
+</beans>
\ No newline at end of file