You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2012/12/08 11:42:56 UTC

svn commit: r1418639 - in /manifoldcf/trunk: ./ tests/ tests/rss/src/test/java/org/apache/manifoldcf/rss_tests/ tests/webcrawler/

Author: kwright
Date: Sat Dec  8 10:42:55 2012
New Revision: 1418639

URL: http://svn.apache.org/viewvc?rev=1418639&view=rev
Log:
Add a simple RSS integration test.

Added:
    manifoldcf/trunk/tests/rss/src/test/java/org/apache/manifoldcf/rss_tests/RSSSimpleCrawlDerbyIT.java
    manifoldcf/trunk/tests/rss/src/test/java/org/apache/manifoldcf/rss_tests/RSSSimpleCrawlTester.java   (with props)
    manifoldcf/trunk/tests/webcrawler/pom.xml   (with props)
Modified:
    manifoldcf/trunk/build.xml
    manifoldcf/trunk/tests/pom.xml

Modified: manifoldcf/trunk/build.xml
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/build.xml?rev=1418639&r1=1418638&r2=1418639&view=diff
==============================================================================
--- manifoldcf/trunk/build.xml (original)
+++ manifoldcf/trunk/build.xml Sat Dec  8 10:42:55 2012
@@ -2282,6 +2282,10 @@
         <ant dir="tests/rss" target="run-load-HSQLDB"/>
     </target>
 
+    <target name="run-rss-tests-derby" depends="build-tests-framework,build-tests-rss-connector,build-tests-nulloutput-connector,calculate-rss-tests-condition" if="rss-tests.include">
+        <ant dir="tests/rss" target="run-derby"/>
+    </target>
+
     <target name="run-webcrawler-tests-derby" depends="build-tests-framework,build-tests-webcrawler-connector,build-tests-nulloutput-connector,calculate-webcrawler-tests-condition" if="webcrawler-tests.include">
         <ant dir="tests/webcrawler" target="run-derby"/>
     </target>
@@ -2478,7 +2482,7 @@
     <target name="run-tests-derby-lgpl-connectors" depends="run-tests-derby-jcifs-connector"/>
     <target name="run-tests-derby-proprietary-connectors" depends="run-tests-derby-documentum-connector,run-tests-derby-filenet-connector,run-tests-derby-livelink-connector,run-tests-derby-memex-connector,run-tests-derby-meridio-connector,run-tests-derby-sharepoint-connector"/>
     
-    <target name="end-to-end-tests-derby" depends="run-filesystem-tests-derby,run-webcrawler-tests-derby,run-wiki-tests-derby,run-alfresco-tests-derby,run-cmis-tests-derby,run-sharepoint-tests-derby"/>
+    <target name="end-to-end-tests-derby" depends="run-filesystem-tests-derby,run-webcrawler-tests-derby,run-rss-tests-derby,run-wiki-tests-derby,run-alfresco-tests-derby,run-cmis-tests-derby,run-sharepoint-tests-derby"/>
 
     <target name="run-tests-postgresql-open-connectors" depends="run-tests-postgresql-activedirectory-connector,run-tests-postgresql-ldap-connector,run-tests-postgresql-alfresco-connector,run-tests-postgresql-cmis-connector,run-tests-postgresql-filesystem-connector,run-tests-postgresql-nullauthority-connector,run-tests-postgresql-nulloutput-connector,run-tests-postgresql-rss-connector,run-tests-postgresql-solr-connector,run-tests-postgresql-webcrawler-connector,run-tests-postgresql-wiki-connector,run-tests-postgresql-jdbc-connector,run-tests-postgresql-opensearchserver-connector,run-tests-postgresql-elasticsearch-connector"/>
     <target name="run-tests-postgresql-lgpl-connectors" depends="run-tests-postgresql-jcifs-connector"/>

Modified: manifoldcf/trunk/tests/pom.xml
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/tests/pom.xml?rev=1418639&r1=1418638&r2=1418639&view=diff
==============================================================================
--- manifoldcf/trunk/tests/pom.xml (original)
+++ manifoldcf/trunk/tests/pom.xml Sat Dec  8 10:42:55 2012
@@ -34,6 +34,7 @@
     <module>filesystem</module>
     <module>sharepoint</module>
     <module>rss</module>
+    <module>webcrawler</module>
   </modules>
   <build>
     <defaultGoal>integration-test</defaultGoal>

Added: manifoldcf/trunk/tests/rss/src/test/java/org/apache/manifoldcf/rss_tests/RSSSimpleCrawlDerbyIT.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/tests/rss/src/test/java/org/apache/manifoldcf/rss_tests/RSSSimpleCrawlDerbyIT.java?rev=1418639&view=auto
==============================================================================
--- manifoldcf/trunk/tests/rss/src/test/java/org/apache/manifoldcf/rss_tests/RSSSimpleCrawlDerbyIT.java (added)
+++ manifoldcf/trunk/tests/rss/src/test/java/org/apache/manifoldcf/rss_tests/RSSSimpleCrawlDerbyIT.java Sat Dec  8 10:42:55 2012
@@ -0,0 +1,61 @@
+/* $Id: BigCrawlDerbyLT.java 1226141 2011-12-31 17:23:35Z kwright $ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.rss_tests;
+
+import java.io.*;
+import java.util.*;
+import org.junit.*;
+
+/** This is a very basic sanity check */
+public class RSSSimpleCrawlDerbyIT extends BaseDerby
+{
+
+  protected RSSSimpleCrawlTester tester;
+  protected MockRSSService rssService = null;
+  
+  public RSSSimpleCrawlDerbyIT()
+  {
+    tester = new RSSSimpleCrawlTester(mcfInstance);
+  }
+  
+  // Setup and teardown the mock wiki service
+  
+  @Before
+  public void createRSSService()
+    throws Exception
+  {
+    rssService = new MockRSSService(10);
+    rssService.start();
+  }
+  
+  @After
+  public void shutdownRSSService()
+    throws Exception
+  {
+    if (rssService != null)
+      rssService.stop();
+  }
+
+  @Test
+  public void simpleCrawl()
+    throws Exception
+  {
+    tester.executeTest();
+  }
+}

Added: manifoldcf/trunk/tests/rss/src/test/java/org/apache/manifoldcf/rss_tests/RSSSimpleCrawlTester.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/tests/rss/src/test/java/org/apache/manifoldcf/rss_tests/RSSSimpleCrawlTester.java?rev=1418639&view=auto
==============================================================================
--- manifoldcf/trunk/tests/rss/src/test/java/org/apache/manifoldcf/rss_tests/RSSSimpleCrawlTester.java (added)
+++ manifoldcf/trunk/tests/rss/src/test/java/org/apache/manifoldcf/rss_tests/RSSSimpleCrawlTester.java Sat Dec  8 10:42:55 2012
@@ -0,0 +1,120 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.rss_tests;
+
+import org.apache.manifoldcf.core.interfaces.*;
+import org.apache.manifoldcf.agents.interfaces.*;
+import org.apache.manifoldcf.crawler.interfaces.*;
+import org.apache.manifoldcf.crawler.system.ManifoldCF;
+
+import org.apache.manifoldcf.crawler.connectors.rss.RSSConnector;
+
+import java.io.*;
+import java.util.*;
+
+/** This is a small simple crawl */
+public class RSSSimpleCrawlTester
+{
+  protected org.apache.manifoldcf.crawler.tests.ManifoldCFInstance instance;
+  
+  public RSSSimpleCrawlTester(org.apache.manifoldcf.crawler.tests.ManifoldCFInstance instance)
+  {
+    this.instance = instance;
+  }
+  
+  public void executeTest()
+    throws Exception
+  {
+    // Hey, we were able to install the file system connector etc.
+    // Now, create a local test job and run it.
+    IThreadContext tc = ThreadContextFactory.make();
+      
+    // Create a basic file system connection, and save it.
+    IRepositoryConnectionManager mgr = RepositoryConnectionManagerFactory.make(tc);
+    IRepositoryConnection conn = mgr.create();
+    conn.setName("RSS Connection");
+    conn.setDescription("RSS Connection");
+    conn.setClassName("org.apache.manifoldcf.crawler.connectors.rss.RSSConnector");
+    conn.setMaxConnections(100);
+    ConfigParams cp = conn.getConfigParams();
+    cp.setParameter(RSSConnector.emailParameter,"somebody@somewhere.com");
+    cp.setParameter(RSSConnector.maxOpenParameter,"100");
+    cp.setParameter(RSSConnector.maxFetchesParameter,"1000000");
+    cp.setParameter(RSSConnector.bandwidthParameter,"1000000");
+    cp.setParameter(RSSConnector.robotsUsageParameter,"none");
+    // Now, save
+    mgr.save(conn);
+      
+    // Create a basic null output connection, and save it.
+    IOutputConnectionManager outputMgr = OutputConnectionManagerFactory.make(tc);
+    IOutputConnection outputConn = outputMgr.create();
+    outputConn.setName("Null Connection");
+    outputConn.setDescription("Null Connection");
+    outputConn.setClassName("org.apache.manifoldcf.agents.output.nullconnector.NullConnector");
+    outputConn.setMaxConnections(100);
+    // Now, save
+    outputMgr.save(outputConn);
+
+    // Create a job.
+    IJobManager jobManager = JobManagerFactory.make(tc);
+    IJobDescription job = jobManager.createJob();
+    job.setDescription("Test Job");
+    job.setConnectionName("RSS Connection");
+    job.setOutputConnectionName("Null Connection");
+    job.setType(job.TYPE_SPECIFIED);
+    job.setStartMethod(job.START_DISABLE);
+    job.setHopcountMode(job.HOPCOUNT_NEVERDELETE);
+      
+    // Now, set up the document specification.
+    DocumentSpecification ds = job.getSpecification();
+    // For 100 documents, set up 10 seeds
+    for (int i = 0 ; i < 10 ; i++)
+    {
+      SpecificationNode sn = new SpecificationNode("feed");
+      sn.setAttribute("url","http://localhost:8189/rss/gen.php?type=feed&feed="+i);
+      ds.addChild(ds.getChildCount(),sn);
+    }
+      
+    // Set up the output specification.
+    OutputSpecification os = job.getOutputSpecification();
+    // Null output connections have no output specification, so this is a no-op.
+    
+    // Save the job.
+    jobManager.save(job);
+
+    // Now, start the job, and wait until it completes.
+    long startTime = System.currentTimeMillis();
+    jobManager.manualStart(job.getID());
+    instance.waitJobInactiveNative(jobManager,job.getID(),600000L);
+    System.err.println("Crawl required "+new Long(System.currentTimeMillis()-startTime).toString()+" milliseconds");
+
+    // Check to be sure we actually processed the right number of documents.
+    JobStatus status = jobManager.getStatus(job.getID());
+    // The test data area has 3 documents and one directory, and we have to count the root directory too.
+    if (status.getDocumentsProcessed() != 110)
+      throw new ManifoldCFException("Wrong number of documents processed - expected 110, saw "+new Long(status.getDocumentsProcessed()).toString());
+      
+    // Now, delete the job.
+    jobManager.deleteJob(job.getID());
+    instance.waitJobDeletedNative(jobManager,job.getID(),60000L);
+      
+    // Cleanup is automatic by the base class, so we can feel free to leave jobs and connections lying around.
+  }
+  
+}

Propchange: manifoldcf/trunk/tests/rss/src/test/java/org/apache/manifoldcf/rss_tests/RSSSimpleCrawlTester.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: manifoldcf/trunk/tests/rss/src/test/java/org/apache/manifoldcf/rss_tests/RSSSimpleCrawlTester.java
------------------------------------------------------------------------------
    svn:keywords = Id

Added: manifoldcf/trunk/tests/webcrawler/pom.xml
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/tests/webcrawler/pom.xml?rev=1418639&view=auto
==============================================================================
--- manifoldcf/trunk/tests/webcrawler/pom.xml (added)
+++ manifoldcf/trunk/tests/webcrawler/pom.xml Sat Dec  8 10:42:55 2012
@@ -0,0 +1,292 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <parent>
+    <groupId>org.apache.manifoldcf</groupId>
+    <artifactId>mcf-tests</artifactId>
+    <version>1.1-SNAPSHOT</version>
+    <relativePath>../pom.xml</relativePath>
+  </parent>
+  <modelVersion>4.0.0</modelVersion>
+
+  <artifactId>mcf-webcrawler-test</artifactId>
+  <name>ManifoldCF - Test - Webcrawler</name>
+
+  <build>
+    <defaultGoal>integration-test</defaultGoal>
+    <plugins>
+      <plugin>
+        <artifactId>maven-dependency-plugin</artifactId>
+        <executions>
+           <execution>
+            <id>copy-war</id>
+            <phase>generate-resources</phase>
+            <goals>
+              <goal>copy</goal>
+            </goals>
+            <configuration>
+              <outputDirectory>target/dependency</outputDirectory>
+              <artifactItems>
+                <artifactItem>
+                  <groupId>${project.groupId}</groupId>
+                  <artifactId>mcf-api-service</artifactId>
+                  <version>${project.version}</version>
+                  <type>war</type>
+                  <overWrite>false</overWrite>
+                </artifactItem>
+                <artifactItem>
+                  <groupId>${project.groupId}</groupId>
+                  <artifactId>mcf-authority-service</artifactId>
+                  <version>${project.version}</version>
+                  <type>war</type>
+                  <overWrite>false</overWrite>
+                </artifactItem>
+                <artifactItem>
+                  <groupId>${project.groupId}</groupId>
+                  <artifactId>mcf-crawler-ui</artifactId>
+                  <version>${project.version}</version>
+                  <type>war</type>
+                  <overWrite>false</overWrite>
+                </artifactItem>
+              </artifactItems>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-failsafe-plugin</artifactId>
+        <version>2.10</version>
+        <configuration>
+          <skipTests>${skipITs}</skipTests>
+          <systemPropertyVariables>
+            <crawlerWarPath>../dependency/mcf-crawler-ui-${project.version}.war</crawlerWarPath>
+            <authorityserviceWarPath>../dependency/mcf-authority-service-${project.version}.war</authorityserviceWarPath>
+            <apiWarPath>../dependency/mcf-api-service-${project.version}.war</apiWarPath>
+          </systemPropertyVariables>
+          <excludes>
+            <exclude>**/*Postgresql*.java</exclude>
+            <exclude>**/*MySQL*.java</exclude>
+          </excludes>
+          <forkMode>always</forkMode>
+          <workingDirectory>target/test-output</workingDirectory>
+        </configuration>
+        <executions>
+          <execution>
+            <id>integration-test</id>
+            <goals>
+              <goal>integration-test</goal>
+            </goals>
+          </execution>
+          <execution>
+            <id>verify</id>
+            <goals>
+              <goal>verify</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+
+  <dependencies>
+    <!-- Internal dependencies -->
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-api-service</artifactId>
+      <version>${project.version}</version>
+      <type>war</type>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-authority-service</artifactId>
+      <version>${project.version}</version>
+      <type>war</type>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-crawler-ui</artifactId>
+      <version>${project.version}</version>
+      <type>war</type>
+    </dependency>
+
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-core</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-agents</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-pull-agent</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-rss-connector</artifactId>
+      <version>${project.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-nulloutput-connector</artifactId>
+      <version>${project.version}</version>
+      <scope>test</scope>
+    </dependency>
+    
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-server</artifactId>
+      <version>${jetty.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-util</artifactId>
+      <version>${jetty.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-webapp</artifactId>
+      <version>${jetty.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-servlet</artifactId>
+      <version>${jetty.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-http</artifactId>
+      <version>${jetty.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-io</artifactId>
+      <version>${jetty.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-security</artifactId>
+      <version>${jetty.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-continuation</artifactId>
+      <version>${jetty.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-xml</artifactId>
+      <version>${jetty.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <version>${slf4j.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-simple</artifactId>
+      <version>${slf4j.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.mortbay.jetty</groupId>
+      <artifactId>jsp-api-2.1-glassfish</artifactId>
+      <version>${glassfish.version}</version>
+    </dependency>    
+    <dependency>
+      <groupId>org.mortbay.jetty</groupId>
+      <artifactId>jsp-2.1-glassfish</artifactId>
+      <version>${glassfish.version}</version>
+    </dependency>
+    
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-core</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-agents</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-pull-agent</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>${junit.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>postgresql</groupId>
+      <artifactId>postgresql</artifactId>
+      <version>${postgresql.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.hsqldb</groupId>
+      <artifactId>hsqldb</artifactId>
+      <version>${hsqldb.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.derby</groupId>
+      <artifactId>derby</artifactId>
+      <version>${derby.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>mysql</groupId>
+      <artifactId>mysql-connector-java</artifactId>
+      <version>${mysql.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.httpcomponents</groupId>
+      <artifactId>httpclient</artifactId>
+      <version>${httpcomponent.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>commons-logging</groupId>
+      <artifactId>commons-logging</artifactId>
+      <version>${commons-logging.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>commons-codec</groupId>
+      <artifactId>commons-codec</artifactId>
+      <version>${commons-codec.version}</version>
+    </dependency>
+  </dependencies>
+</project>
\ No newline at end of file

Propchange: manifoldcf/trunk/tests/webcrawler/pom.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: manifoldcf/trunk/tests/webcrawler/pom.xml
------------------------------------------------------------------------------
    svn:keywords = Id