You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2013/04/29 18:10:16 UTC
svn commit: r1477170 - in /manifoldcf/trunk: ./
tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/
Author: kwright
Date: Mon Apr 29 16:10:10 2013
New Revision: 1477170
URL: http://svn.apache.org/r1477170
Log:
Load test which exercises throttling on a mixed set of pages, some of which have errors, and restarts the crawl often.
Added:
manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/ThrottlingDerbyLT.java (with props)
Modified:
manifoldcf/trunk/build.xml
manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/MockWebService.java
manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/ThrottlingPostgresqlLT.java
manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/ThrottlingTester.java
Modified: manifoldcf/trunk/build.xml
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/build.xml?rev=1477170&r1=1477169&r2=1477170&view=diff
==============================================================================
--- manifoldcf/trunk/build.xml (original)
+++ manifoldcf/trunk/build.xml Mon Apr 29 16:10:10 2013
@@ -2307,6 +2307,10 @@
<ant dir="tests/webcrawler" target="run-postgresql"/>
</target>
+ <target name="run-webcrawler-loadtests-derby" depends="build-tests-framework,build-tests-webcrawler-connector,build-tests-nulloutput-connector,calculate-webcrawler-tests-condition" if="webcrawler-tests.include">
+ <ant dir="tests/webcrawler" target="run-load-derby"/>
+ </target>
+
<target name="run-webcrawler-loadtests-postgresql" depends="build-tests-framework,build-tests-webcrawler-connector,build-tests-nulloutput-connector,calculate-webcrawler-tests-condition" if="webcrawler-tests.include">
<ant dir="tests/webcrawler" target="run-load-postgresql"/>
</target>
Modified: manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/MockWebService.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/MockWebService.java?rev=1477170&r1=1477169&r2=1477170&view=diff
==============================================================================
--- manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/MockWebService.java (original)
+++ manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/MockWebService.java Mon Apr 29 16:10:10 2013
@@ -36,12 +36,17 @@ public class MockWebService
{
Server server;
WebServlet servlet;
-
+
public MockWebService(int docsPerLevel)
{
+ this(docsPerLevel, 10, false);
+ }
+
+ public MockWebService(int docsPerLevel, int maxLevels, boolean generateBadPages)
+ {
server = new Server(8191);
server.setThreadPool(new QueuedThreadPool(100));
- servlet = new WebServlet(docsPerLevel);
+ servlet = new WebServlet(docsPerLevel, maxLevels, generateBadPages);
ServletContextHandler context = new ServletContextHandler(ServletContextHandler.SESSIONS);
context.setContextPath("/web");
server.setHandler(context);
@@ -61,11 +66,15 @@ public class MockWebService
public static class WebServlet extends HttpServlet
{
- int docsPerLevel;
+ final int docsPerLevel;
+ final int maxLevels;
+ final boolean generateBadPages;
- public WebServlet(int docsPerLevel)
+ public WebServlet(int docsPerLevel, int maxLevels, boolean generateBadPages)
{
this.docsPerLevel = docsPerLevel;
+ this.maxLevels = maxLevels;
+ this.generateBadPages = generateBadPages;
}
@Override
@@ -96,7 +105,9 @@ public class MockWebService
{
throw new IOException("Level number must be a number: "+level);
}
-
+ if (theLevel >= maxLevels)
+ throw new IOException("Level number too big.");
+
int theItem;
try
{
@@ -119,43 +130,55 @@ public class MockWebService
throw new IOException("Doc number too big: "+theItem+" ; level "+theLevel+" ; docsPerLevel "+docsPerLevel);
// Generate the page
- res.setStatus(HttpServletResponse.SC_OK);
- res.setContentType("text/html; charset=utf-8");
- res.getWriter().printf("<html>\n");
- res.getWriter().printf(" <body>\n");
-
- res.getWriter().printf("This is doc number "+theItem+" and level number "+theLevel+" in site "+site+"\n");
-
- // Generate links to all parents
- int parentLevel = theLevel;
- int parentItem = theItem;
- while (parentLevel > 0)
- {
- parentLevel--;
- parentItem /= docsPerLevel;
- generateLink(res,site,parentLevel,parentItem);
- }
-
- // Temporary: Prevent links to children deeper than a certain level; this is to help
- // the debug process
- if (theLevel < 9)
+ if (generateBadPages && (theItem % 2) == 1)
{
- // Generate links to direct children
- for (int i = 0; i < docsPerLevel; i++)
+ // Generate a bad page. This is a page with a non-200 return code, and with some content
+ // > 1024 characters
+ res.setStatus(HttpServletResponse.SC_UNAUTHORIZED);
+ res.getWriter().printf("This is the error message for a 401 page.");
+ for (int i = 0; i < 100; i++)
{
- int docNumber = i + theItem * docsPerLevel;
- generateLink(res,site,theLevel+1,docNumber);
+ res.getWriter().printf(" Error message # "+i);
}
}
-
- // Generate some limited cross-links to other items at this level
- for (int i = theItem; i < maxDocsThisLevel && i < theItem + docsPerLevel; i++)
+ else
{
- generateLink(res,site,theLevel,i);
+ res.setStatus(HttpServletResponse.SC_OK);
+ res.setContentType("text/html; charset=utf-8");
+ res.getWriter().printf("<html>\n");
+ res.getWriter().printf(" <body>\n");
+
+ res.getWriter().printf("This is doc number "+theItem+" and level number "+theLevel+" in site "+site+"\n");
+
+ // Generate links to all parents
+ int parentLevel = theLevel;
+ int parentItem = theItem;
+ while (parentLevel > 0)
+ {
+ parentLevel--;
+ parentItem /= docsPerLevel;
+ generateLink(res,site,parentLevel,parentItem);
+ }
+
+ if (theLevel < maxLevels-1)
+ {
+ // Generate links to direct children
+ for (int i = 0; i < docsPerLevel; i++)
+ {
+ int docNumber = i + theItem * docsPerLevel;
+ generateLink(res,site,theLevel+1,docNumber);
+ }
+ }
+
+ // Generate some limited cross-links to other items at this level
+ for (int i = theItem; i < maxDocsThisLevel && i < theItem + docsPerLevel; i++)
+ {
+ generateLink(res,site,theLevel,i);
+ }
+
+ res.getWriter().printf(" </body>\n");
+ res.getWriter().printf("</html>\n");
}
-
- res.getWriter().printf(" </body>\n");
- res.getWriter().printf("</html>\n");
res.getWriter().flush();
}
catch (IOException e)
Added: manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/ThrottlingDerbyLT.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/ThrottlingDerbyLT.java?rev=1477170&view=auto
==============================================================================
--- manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/ThrottlingDerbyLT.java (added)
+++ manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/ThrottlingDerbyLT.java Mon Apr 29 16:10:10 2013
@@ -0,0 +1,61 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.webcrawler_tests;
+
+import java.io.*;
+import java.util.*;
+import org.junit.*;
+
+/** This is a very basic sanity check */
+public class ThrottlingDerbyLT extends BaseDerby
+{
+
+ protected ThrottlingTester tester;
+ protected MockWebService webService = null;
+
+ public ThrottlingDerbyLT()
+ {
+ tester = new ThrottlingTester(mcfInstance);
+ }
+
+ // Setup and teardown the mock wiki service
+
+ @Before
+ public void createWebService()
+ throws Exception
+ {
+ webService = new MockWebService(10,2,true);
+ webService.start();
+ }
+
+ @After
+ public void shutdownWebService()
+ throws Exception
+ {
+ if (webService != null)
+ webService.stop();
+ }
+
+ @Test
+ public void bigCrawl()
+ throws Exception
+ {
+ tester.executeTest();
+ }
+}
Propchange: manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/ThrottlingDerbyLT.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/ThrottlingDerbyLT.java
------------------------------------------------------------------------------
svn:keywords = Id
Modified: manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/ThrottlingPostgresqlLT.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/ThrottlingPostgresqlLT.java?rev=1477170&r1=1477169&r2=1477170&view=diff
==============================================================================
--- manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/ThrottlingPostgresqlLT.java (original)
+++ manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/ThrottlingPostgresqlLT.java Mon Apr 29 16:10:10 2013
@@ -40,7 +40,7 @@ public class ThrottlingPostgresqlLT exte
public void createWebService()
throws Exception
{
- webService = new MockWebService(10);
+ webService = new MockWebService(10,2,true);
webService.start();
}
Modified: manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/ThrottlingTester.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/ThrottlingTester.java?rev=1477170&r1=1477169&r2=1477170&view=diff
==============================================================================
--- manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/ThrottlingTester.java (original)
+++ manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/ThrottlingTester.java Mon Apr 29 16:10:10 2013
@@ -29,7 +29,7 @@ import org.apache.manifoldcf.crawler.con
import java.io.*;
import java.util.*;
-/** This is a 10000-document crawl with throttling */
+/** This is a repeated 100-document crawl with throttling */
public class ThrottlingTester
{
protected org.apache.manifoldcf.crawler.tests.ManifoldCFInstance instance;
@@ -97,8 +97,7 @@ public class ThrottlingTester
job.setOutputConnectionName("Null Connection");
job.setType(job.TYPE_SPECIFIED);
job.setStartMethod(job.START_DISABLE);
- job.setHopcountMode(job.HOPCOUNT_ACCURATE);
- job.addHopCountFilter("link",new Long(2));
+ job.setHopcountMode(job.HOPCOUNT_NEVERDELETE);
// Now, set up the document specification.
DocumentSpecification ds = job.getSpecification();
@@ -106,7 +105,7 @@ public class ThrottlingTester
// Set up 100 seeds
SpecificationNode sn = new SpecificationNode(WebcrawlerConfig.NODE_SEEDS);
StringBuilder sb = new StringBuilder();
- for (int i = 0 ; i < 100 ; i++)
+ for (int i = 0 ; i < 10 ; i++)
{
sb.append("http://localhost:8191/web/gen.php?site="+i+"&level=0&item=0\n");
}
@@ -128,21 +127,23 @@ public class ThrottlingTester
// Save the job.
jobManager.save(job);
- // Now, start the job, and wait until it completes.
- long startTime = System.currentTimeMillis();
- jobManager.manualStart(job.getID());
- instance.waitJobInactiveNative(jobManager,job.getID(),220000000L);
- System.err.println("Crawl required "+new Long(System.currentTimeMillis()-startTime).toString()+" milliseconds");
-
- // Check to be sure we actually processed the right number of documents.
- JobStatus status = jobManager.getStatus(job.getID());
- // Four levels deep from 100 site seeds: Each site seed has 1 + 10 + 100 + 1000 = 1111 documents, so 100 has 111100 total, and 11100 processed
- if (status.getDocumentsProcessed() != 11100)
- throw new ManifoldCFException("Wrong number of documents processed - expected 111100, saw "+new Long(status.getDocumentsProcessed()).toString());
+ for (int i = 0; i < 100; i++)
+ {
+ System.err.println("Iteration # "+i);
+ // Now, start the job, and wait until it completes.
+ long startTime = System.currentTimeMillis();
+ jobManager.manualStart(job.getID());
+ instance.waitJobInactiveNative(jobManager,job.getID(),300000L);
+ System.err.println(" Crawl required "+new Long(System.currentTimeMillis()-startTime).toString()+" milliseconds");
+
+ // Check to be sure we actually processed the right number of documents.
+ JobStatus status = jobManager.getStatus(job.getID());
+ System.err.println(" "+new Long(status.getDocumentsProcessed())+" documents processed");
+ }
// Now, delete the job.
jobManager.deleteJob(job.getID());
- instance.waitJobDeletedNative(jobManager,job.getID(),18000000L);
+ instance.waitJobDeletedNative(jobManager,job.getID(),300000L);
// Cleanup is automatic by the base class, so we can feel free to leave jobs and connections lying around.
}