You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2012/12/02 17:58:00 UTC

svn commit: r1416199 [3/3] - in /manifoldcf/trunk: connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcraw...

Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties?rev=1416199&r1=1416198&r2=1416199&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties (original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties Sun Dec  2 16:57:56 2012
@@ -52,8 +52,9 @@ WebcrawlerConnector.SessionBasedAccessCr
 WebcrawlerConnector.LoginPages=Login pages
 WebcrawlerConnector.LoginURLRegularExpression=Login URL regular expression
 WebcrawlerConnector.PageType=Page type
-WebcrawlerConnector.FormNamelinkTargetRegularExpression=Form name/link target regular expression
+WebcrawlerConnector.FormNamelinkTargetRegularExpression=Identification regular expression
 WebcrawlerConnector.OverrideFormParameters=Override form parameters
+WebcrawlerConnector.OverrideTargetURL=Override target URL
 WebcrawlerConnector.NoLoginPagesSpecified=No login pages specified
 WebcrawlerConnector.NoSessionBasedAccessCredentials=No session-based access credentials
 WebcrawlerConnector.TrustCertificates=Trust certificates:
@@ -135,9 +136,10 @@ WebcrawlerConnector.ForLoginPage= for lo
 WebcrawlerConnector.ForCredential= for credential #
 WebcrawlerConnector.AddParameterToLoginPage=Add parameter to login page #
 WebcrawlerConnector.AddLoginPageToCredential=Add login page to credential #
-WebcrawlerConnector.FormName=Form name
+WebcrawlerConnector.FormName=Form name/id/action
 WebcrawlerConnector.LinkTarget=Link target
-WebcrawlerConnector.Redirection=Redirection
+WebcrawlerConnector.RedirectionTo=Redirection to
+WebcrawlerConnector.PageContent=Page content
 WebcrawlerConnector.AddSessionAuthenticationUrlRegularExpression=Add session authentication url regular expression
 WebcrawlerConnector.DeleteTrustUrlRegularExpression=Delete trust url regular expression #
 WebcrawlerConnector.TrustEverything=Trust everything
@@ -158,7 +160,7 @@ WebcrawlerConnector.Exclusions=Exclusion
 WebcrawlerConnector.Security=Security
 WebcrawlerConnector.Metadata=Metadata
 WebcrawlerConnector.FoundAnIllegalRegularExpressionIn=Found an illegal regular expression in
-WebcrawlerConnector.ErrorWas='.  Error was: 
+WebcrawlerConnector.ErrorWas=Error was: 
 WebcrawlerConnector.TypeInAnAccessToken=Type in an access token
 WebcrawlerConnector.TypeInMetadataName=Type in metadata name
 WebcrawlerConnector.TypeInMetadataValue=Type in metadata value

Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_ja_JP.properties
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_ja_JP.properties?rev=1416199&r1=1416198&r2=1416199&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_ja_JP.properties (original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_ja_JP.properties Sun Dec  2 16:57:56 2012
@@ -54,6 +54,7 @@ WebcrawlerConnector.LoginURLRegularExpre
 WebcrawlerConnector.PageType=ページタイプ
 WebcrawlerConnector.FormNamelinkTargetRegularExpression=フォーム名/リンクターゲット正規表現
 WebcrawlerConnector.OverrideFormParameters=フォーム引数をオーバーライド
+WebcrawlerConnector.OverrideTargetURL=Override target URL
 WebcrawlerConnector.NoLoginPagesSpecified=ログインページが指定されていません
 WebcrawlerConnector.NoSessionBasedAccessCredentials=セッションベースアクセスサーティフィケートがありません
 WebcrawlerConnector.TrustCertificates=トラストサーティフィケート:
@@ -135,9 +136,10 @@ WebcrawlerConnector.ForLoginPage= ロ
 WebcrawlerConnector.ForCredential= 認証 #
 WebcrawlerConnector.AddParameterToLoginPage=ログインページに引数を追加 #
 WebcrawlerConnector.AddLoginPageToCredential=認証にログインページを追加: #
-WebcrawlerConnector.FormName= ãƒ•ã‚©ãƒ¼ãƒ å
-WebcrawlerConnector.LinkTarget= ãƒªãƒ³ã‚¯ã‚¿ãƒ¼ã‚²ãƒƒãƒˆ
-WebcrawlerConnector.Redirection= ãƒªãƒ€ã‚¤ãƒ¬ã‚¯ã‚·ãƒ§ãƒ³
+WebcrawlerConnector.FormName=フォーム名
+WebcrawlerConnector.LinkTarget=リンクターゲット
+WebcrawlerConnector.RedirectionTo=リダイレクション
+WebcrawlerConnector.PageContent=Page content
 WebcrawlerConnector.AddSessionAuthenticationUrlRegularExpression=セッション認証URI正規表現
 WebcrawlerConnector.DeleteTrustUrlRegularExpression=信用URI正規表現式を削除: #
 WebcrawlerConnector.TrustEverything=すべて信用
@@ -157,7 +159,7 @@ WebcrawlerConnector.Exclusions=除å�
 WebcrawlerConnector.Security=セキュリティ
 WebcrawlerConnector.Metadata=メタデータ
 WebcrawlerConnector.FoundAnIllegalRegularExpressionIn=不正な正規表現式
-WebcrawlerConnector.ErrorWas=。エラー: 
+WebcrawlerConnector.ErrorWas=エラー: 
 WebcrawlerConnector.TypeInAnAccessToken=アクセストークンの入力
 WebcrawlerConnector.TypeInMetadataName=メタデータ名の入力
 WebcrawlerConnector.TypeInMetadataValue=メタデータ値の入力

Added: manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/MockSessionWebService.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/MockSessionWebService.java?rev=1416199&view=auto
==============================================================================
--- manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/MockSessionWebService.java (added)
+++ manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/MockSessionWebService.java Sun Dec  2 16:57:56 2012
@@ -0,0 +1,223 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.webcrawler_tests;
+
+import org.eclipse.jetty.servlet.ServletHolder;
+import org.eclipse.jetty.server.Server;
+import org.eclipse.jetty.servlet.ServletContextHandler;
+import org.eclipse.jetty.util.thread.QueuedThreadPool;
+
+import javax.servlet.http.HttpServlet;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+
+import java.io.*;
+import java.util.*;
+
+/** Mock web service that requires session authentication */
+public class MockSessionWebService
+{
+  Server server;
+  SessionWebServlet servlet;
+    
+  public MockSessionWebService(int numContentDocs, String userName, String password)
+  {
+    server = new Server(8191);
+    server.setThreadPool(new QueuedThreadPool(100));
+    servlet = new SessionWebServlet(numContentDocs,userName,password);
+    ServletContextHandler context = new ServletContextHandler(ServletContextHandler.SESSIONS);
+    context.setContextPath("/web");
+    server.setHandler(context);
+    context.addServlet(new ServletHolder(servlet), "/");
+  }
+    
+  public void start() throws Exception
+  {
+    server.start();
+  }
+    
+  public void stop() throws Exception
+  {
+    server.stop();
+  }
+
+  
+  public static class SessionWebServlet extends HttpServlet
+  {
+    protected final int contentPageCount;
+    protected final String loginUser;
+    protected final String loginPassword;
+    
+    public SessionWebServlet(int contentPageCount, String loginUser, String loginPassword)
+    {
+      this.contentPageCount = contentPageCount;
+      this.loginUser = loginUser;
+      this.loginPassword = loginPassword;
+    }
+    
+    @Override
+    public void service(HttpServletRequest req, HttpServletResponse res)
+      throws IOException
+    {
+      try {
+        // This mock web service resolves the following urls:
+        // /index.html (an index of all N content pages)
+        // /protectedcontent.html?id=N  (N content pages)
+        // /loginpage.html (the login page, rendered either as a form,
+        //    or as a redirection back to the content page, or as a redirection to the index page)
+        
+        // Get path part of request URL
+        // MHL
+        String pathPart = "";
+
+        if (pathPart.equals("/loginpage.html"))
+        {
+          // Login page logic
+          String id = req.getParameter("id");
+          Integer idNumber;
+          if (id == null)
+            idNumber = null;
+          else
+            idNumber = new Integer(id);
+          
+          String userName = req.getParameter("user");
+          String password = req.getParameter("password");
+          
+          // MHL
+        }
+        else if (pathPart.equals("/protectedcontent.html"))
+        {
+          // Content page logic
+          String id = req.getParameter("id");
+          if (id == null)
+          {
+            generateBadArgumentResponse(res);
+          }
+          // MHL
+        }
+        else if (pathPart.equals("/index.html"))
+        {
+          // Index logic
+          // MHL
+        }
+      }
+      catch (IOException e)
+      {
+        e.printStackTrace();
+        throw e;
+      }
+    }
+
+    protected static void generateBadArgumentResponse(HttpServletResponse res)
+      throws IOException
+    {
+      // MHL
+    }
+    
+    protected static void generateLoginRedirectPage(HttpServletResponse res, Integer returnID)
+      throws IOException
+    {
+      String redirectTarget;
+      if (returnID == null)
+        redirectTarget = "/web/indexpage.html";
+      else
+        redirectTarget = "/web/protectedcontent.html?id="+returnID;
+      
+      // MHL
+    }
+    
+    protected static void generateLoginFormPage(HttpServletResponse res, Integer returnID)
+      throws IOException
+    {
+      String actionURI = "/web/loginpage.html";
+      if (returnID != null)
+        actionURI += "?id="+returnID;
+      res.setStatus(HttpServletResponse.SC_OK);
+      res.setContentType("text/html; charset=utf-8");
+
+      res.getWriter().printf("<html>\n");
+      res.getWriter().printf("  <body>\n");
+      res.getWriter().printf("    <form name=\"login\" action=\""+actionURI+"\">\n");
+      res.getWriter().printf("      User name: <input type=\"text\" name=\"user\" value=\"\" size=\"20\"/>\n");
+      res.getWriter().printf("      Password: <input type=\"password\" name=\"password\" value=\"\" size=\"20\"/>\n");
+      res.getWriter().printf("      <input type=\"submit\"/>\n");
+      res.getWriter().printf("    </form>\n");
+      res.getWriter().printf("  </body>\n");
+      res.getWriter().printf("</html>\n");
+      
+      res.getWriter().flush();
+
+    }
+    
+    protected static void generateContentRedirectPage(HttpServletResponse res, int itemNumber)
+      throws IOException
+    {
+      String redirectTarget = "/web/loginpage.html?id="+itemNumber;
+      // MHL
+    }
+
+    protected static void generateContentDisplayPage(HttpServletResponse res, int itemNumber)
+      throws IOException
+    {
+      res.setStatus(HttpServletResponse.SC_OK);
+      res.setContentType("text/html; charset=utf-8");
+
+      res.getWriter().printf("<html>\n");
+      res.getWriter().printf("  <body>This is the document content for item "+itemNumber+"</body>");
+      res.getWriter().printf("</html>\n");
+      
+      res.getWriter().flush();
+    }
+    
+    protected static void generateIndexRedirectPage(HttpServletResponse res)
+      throws IOException
+    {
+      String redirectTarget = "/web/loginpage.html";
+      // MHL
+    }
+    
+    protected static void generateIndexDisplayPage(HttpServletResponse res, int countItems)
+      throws IOException
+    {
+      res.setStatus(HttpServletResponse.SC_OK);
+      res.setContentType("text/html; charset=utf-8");
+
+      res.getWriter().printf("<html>\n");
+      res.getWriter().printf("  <body>\n");
+
+      for (int i = 0; i < countItems; i++)
+      {
+        generateContentLink(res,i);
+      }
+      
+      res.getWriter().printf("  </body>\n");
+      res.getWriter().printf("</html>\n");
+      res.getWriter().flush();
+
+    }
+    
+    protected static void generateContentLink(HttpServletResponse res, int itemNumber)
+      throws IOException
+    {
+      res.getWriter().printf("    <a href=\"/web/protectedcontent.html?id="+itemNumber+"\">Item "+itemNumber+"</a>\n");
+    }
+
+  }
+}

Propchange: manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/MockSessionWebService.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/MockSessionWebService.java
------------------------------------------------------------------------------
    svn:keywords = Id

Modified: manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/MockWebService.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/MockWebService.java?rev=1416199&r1=1416198&r2=1416199&view=diff
==============================================================================
--- manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/MockWebService.java (original)
+++ manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/MockWebService.java Sun Dec  2 16:57:56 2012
@@ -73,90 +73,90 @@ public class MockWebService
       throws IOException
     {
       try {
-      String resourceName = null;
-      
-      String site = req.getParameter("site");     // Site ID
-      if (site == null)
-        throw new IOException("Site ID parameter must be set");
-
-      String level = req.getParameter("level");   // Level #
-      if (site == null)
-        throw new IOException("Level number parameter must be set");
-
-      String item = req.getParameter("item");    // Item #
-      if (item == null)
-        throw new IOException("Item number parameter must be set");
+        String resourceName = null;
+        
+        String site = req.getParameter("site");     // Site ID
+        if (site == null)
+          throw new IOException("Site ID parameter must be set");
+
+        String level = req.getParameter("level");   // Level #
+        if (site == null)
+          throw new IOException("Level number parameter must be set");
+
+        String item = req.getParameter("item");    // Item #
+        if (item == null)
+          throw new IOException("Item number parameter must be set");
 
-      int theLevel;
-      try
-      {
-        theLevel = Integer.parseInt(level);
-      }
-      catch (NumberFormatException e)
-      {
-        throw new IOException("Level number must be a number: "+level);
-      }
-      
-      int theItem;
-      try
-      {
-        theItem = Integer.parseInt(item);
-      }
-      catch (NumberFormatException e)
-      {
-        throw new IOException("Item number must be a number: "+item);
-      }
+        int theLevel;
+        try
+        {
+          theLevel = Integer.parseInt(level);
+        }
+        catch (NumberFormatException e)
+        {
+          throw new IOException("Level number must be a number: "+level);
+        }
+        
+        int theItem;
+        try
+        {
+          theItem = Integer.parseInt(item);
+        }
+        catch (NumberFormatException e)
+        {
+          throw new IOException("Item number must be a number: "+item);
+        }
 
-      // Formulate the response.
-      // First, calculate the number of docs on the current level
-      int maxDocsThisLevel = 1;
-      for (int i = 0 ; i < theLevel ; i++)
-      {
-        maxDocsThisLevel *= docsPerLevel;
-      }
-      if (theItem >= maxDocsThisLevel)
-        // Not legal
-        throw new IOException("Doc number too big: "+theItem+" ; level "+theLevel+" ; docsPerLevel "+docsPerLevel);
-
-      // Generate the page
-      res.setStatus(HttpServletResponse.SC_OK);
-      res.setContentType("text/html; charset=utf-8");
-      res.getWriter().printf("<html>\n");
-      res.getWriter().printf("  <body>\n");
-
-      res.getWriter().printf("This is doc number "+theItem+" and level number "+theLevel+" in site "+site+"\n");
-
-      // Generate links to all parents
-      int parentLevel = theLevel;
-      int parentItem = theItem;
-      while (parentLevel > 0)
-      {
-        parentLevel--;
-        parentItem /= docsPerLevel;
-	generateLink(res,site,parentLevel,parentItem);
-      }
-      
-      // Temporary: Prevent links to children deeper than a certain level; this is to help
-      // the debug process
-      if (theLevel < 9)
-      {
-        // Generate links to direct children
-        for (int i = 0; i < docsPerLevel; i++)
+        // Formulate the response.
+        // First, calculate the number of docs on the current level
+        int maxDocsThisLevel = 1;
+        for (int i = 0 ; i < theLevel ; i++)
         {
-          int docNumber = i + theItem * docsPerLevel;
-          generateLink(res,site,theLevel+1,docNumber);
+          maxDocsThisLevel *= docsPerLevel;
         }
-      }
-      
-      // Generate some limited cross-links to other items at this level
-      for (int i = theItem; i < maxDocsThisLevel && i < theItem + docsPerLevel; i++)
-      {
-        generateLink(res,site,theLevel,i);
-      }
-      
-      res.getWriter().printf("  </body>\n");
-      res.getWriter().printf("</html>\n");
-      res.getWriter().flush();
+        if (theItem >= maxDocsThisLevel)
+          // Not legal
+          throw new IOException("Doc number too big: "+theItem+" ; level "+theLevel+" ; docsPerLevel "+docsPerLevel);
+
+        // Generate the page
+        res.setStatus(HttpServletResponse.SC_OK);
+        res.setContentType("text/html; charset=utf-8");
+        res.getWriter().printf("<html>\n");
+        res.getWriter().printf("  <body>\n");
+
+        res.getWriter().printf("This is doc number "+theItem+" and level number "+theLevel+" in site "+site+"\n");
+
+        // Generate links to all parents
+        int parentLevel = theLevel;
+        int parentItem = theItem;
+        while (parentLevel > 0)
+        {
+          parentLevel--;
+          parentItem /= docsPerLevel;
+          generateLink(res,site,parentLevel,parentItem);
+        }
+        
+        // Temporary: Prevent links to children deeper than a certain level; this is to help
+        // the debug process
+        if (theLevel < 9)
+        {
+          // Generate links to direct children
+          for (int i = 0; i < docsPerLevel; i++)
+          {
+            int docNumber = i + theItem * docsPerLevel;
+            generateLink(res,site,theLevel+1,docNumber);
+          }
+        }
+        
+        // Generate some limited cross-links to other items at this level
+        for (int i = theItem; i < maxDocsThisLevel && i < theItem + docsPerLevel; i++)
+        {
+          generateLink(res,site,theLevel,i);
+        }
+        
+        res.getWriter().printf("  </body>\n");
+        res.getWriter().printf("</html>\n");
+        res.getWriter().flush();
       }
       catch (IOException e)
       {

Added: manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/SessionTester.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/SessionTester.java?rev=1416199&view=auto
==============================================================================
--- manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/SessionTester.java (added)
+++ manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/SessionTester.java Sun Dec  2 16:57:56 2012
@@ -0,0 +1,180 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.webcrawler_tests;
+
+import org.apache.manifoldcf.core.interfaces.*;
+import org.apache.manifoldcf.agents.interfaces.*;
+import org.apache.manifoldcf.crawler.interfaces.*;
+import org.apache.manifoldcf.crawler.system.ManifoldCF;
+
+import java.io.*;
+import java.util.*;
+
+import org.apache.manifoldcf.crawler.connectors.webcrawler.WebcrawlerConnector;
+import org.apache.manifoldcf.crawler.connectors.webcrawler.WebcrawlerConfig;
+
+/** Run a session-based crawl */
+public class SessionTester
+{
+  protected org.apache.manifoldcf.crawler.tests.ManifoldCFInstance instance;
+  
+  public SessionTester(org.apache.manifoldcf.crawler.tests.ManifoldCFInstance instance)
+  {
+    this.instance = instance;
+  }
+  
+  public void executeTest()
+    throws Exception
+  {
+    // Hey, we were able to install the web connector etc.
+    // Now, create a local test job and run it.
+    IThreadContext tc = ThreadContextFactory.make();
+      
+    // Create a basic file system connection, and save it.
+    IRepositoryConnectionManager mgr = RepositoryConnectionManagerFactory.make(tc);
+    IRepositoryConnection conn = mgr.create();
+    conn.setName("Web Connection");
+    conn.setDescription("Web Connection");
+    conn.setClassName("org.apache.manifoldcf.crawler.connectors.webcrawler.WebcrawlerConnector");
+    conn.setMaxConnections(100);
+    ConfigParams cp = conn.getConfigParams();
+    
+    cp.setParameter(WebcrawlerConfig.PARAMETER_EMAIL,"someone@somewhere.com");
+    cp.setParameter(WebcrawlerConfig.PARAMETER_ROBOTSUSAGE,"none");
+    
+    // Set session auth settings
+    ConfigurationNode accessCredential = new ConfigurationNode(WebcrawlerConfig.NODE_ACCESSCREDENTIAL);
+    accessCredential.setAttribute(WebcrawlerConfig.ATTR_TYPE,WebcrawlerConfig.ATTRVALUE_SESSION);
+    accessCredential.setAttribute(WebcrawlerConfig.ATTR_URLREGEXP,"/web/");
+    
+    // Add auth pages to accessCredential node
+    
+    // Redirection to login page
+    ConfigurationNode redirectToLogin = new ConfigurationNode(WebcrawlerConfig.NODE_AUTHPAGE);
+    redirectToLogin.setAttribute(WebcrawlerConfig.ATTR_URLREGEXP,"/protectedcontent\\.html\\?");
+    redirectToLogin.setAttribute(WebcrawlerConfig.ATTR_TYPE,WebcrawlerConfig.ATTRVALUE_REDIRECTION);
+    redirectToLogin.setAttribute(WebcrawlerConfig.ATTR_MATCHREGEXP,"/loginpage\\.html\\?");
+    accessCredential.addChild(accessCredential.getChildCount(),redirectToLogin);
+    
+    // Redirection to login page from index
+    ConfigurationNode redirectFromIndex = new ConfigurationNode(WebcrawlerConfig.NODE_AUTHPAGE);
+    redirectFromIndex.setAttribute(WebcrawlerConfig.ATTR_URLREGEXP,"/index\\.html$");
+    redirectFromIndex.setAttribute(WebcrawlerConfig.ATTR_TYPE,WebcrawlerConfig.ATTRVALUE_REDIRECTION);
+    redirectFromIndex.setAttribute(WebcrawlerConfig.ATTR_MATCHREGEXP,"/loginpage\\.html$");
+    accessCredential.addChild(accessCredential.getChildCount(),redirectFromIndex);
+
+    // Login page
+    ConfigurationNode loginPage = new ConfigurationNode(WebcrawlerConfig.NODE_AUTHPAGE);
+    loginPage.setAttribute(WebcrawlerConfig.ATTR_URLREGEXP,"/loginpage\\.html(\\?|$)");
+    loginPage.setAttribute(WebcrawlerConfig.ATTR_TYPE,WebcrawlerConfig.ATTRVALUE_FORM);
+    loginPage.setAttribute(WebcrawlerConfig.ATTR_MATCHREGEXP,"/loginpage\\.html(\\?|$)");
+    // Set credentials
+    ConfigurationNode userParameter = new ConfigurationNode(WebcrawlerConfig.NODE_AUTHPARAMETER);
+    userParameter.setAttribute(WebcrawlerConfig.ATTR_NAMEREGEXP,"user");
+    userParameter.setAttribute(WebcrawlerConfig.ATTR_VALUE,"foo");
+    loginPage.addChild(loginPage.getChildCount(),userParameter);
+    ConfigurationNode passwordParameter = new ConfigurationNode(WebcrawlerConfig.NODE_AUTHPARAMETER);
+    passwordParameter.setAttribute(WebcrawlerConfig.ATTR_NAMEREGEXP,"password");
+    passwordParameter.setAttribute(WebcrawlerConfig.ATTR_VALUE,"bar");
+    loginPage.addChild(loginPage.getChildCount(),passwordParameter);
+    accessCredential.addChild(accessCredential.getChildCount(),loginPage);
+
+    // Redirection from login page to content
+    ConfigurationNode redirectFromLogin = new ConfigurationNode(WebcrawlerConfig.NODE_AUTHPAGE);
+    redirectFromLogin.setAttribute(WebcrawlerConfig.ATTR_URLREGEXP,"/loginpage\\.html\\?");
+    redirectFromLogin.setAttribute(WebcrawlerConfig.ATTR_TYPE,WebcrawlerConfig.ATTRVALUE_REDIRECTION);
+    redirectFromLogin.setAttribute(WebcrawlerConfig.ATTR_MATCHREGEXP,"/protectedcontent\\.html\\?");
+    accessCredential.addChild(accessCredential.getChildCount(),redirectFromLogin);
+
+    // Redirection from login page to index
+    ConfigurationNode redirectToIndexFromLogin = new ConfigurationNode(WebcrawlerConfig.NODE_AUTHPAGE);
+    redirectToIndexFromLogin.setAttribute(WebcrawlerConfig.ATTR_URLREGEXP,"/loginpage\\.html$");
+    redirectToIndexFromLogin.setAttribute(WebcrawlerConfig.ATTR_TYPE,WebcrawlerConfig.ATTRVALUE_REDIRECTION);
+    redirectToIndexFromLogin.setAttribute(WebcrawlerConfig.ATTR_MATCHREGEXP,"/index\\.html$");
+    accessCredential.addChild(accessCredential.getChildCount(),redirectToIndexFromLogin);
+
+    cp.addChild(cp.getChildCount(),accessCredential);
+    
+    // Now, save
+    mgr.save(conn);
+      
+    // Create a basic null output connection, and save it.
+    IOutputConnectionManager outputMgr = OutputConnectionManagerFactory.make(tc);
+    IOutputConnection outputConn = outputMgr.create();
+    outputConn.setName("Null Connection");
+    outputConn.setDescription("Null Connection");
+    outputConn.setClassName("org.apache.manifoldcf.agents.output.nullconnector.NullConnector");
+    outputConn.setMaxConnections(100);
+    // Now, save
+    outputMgr.save(outputConn);
+
+    // Create a job.
+    IJobManager jobManager = JobManagerFactory.make(tc);
+    IJobDescription job = jobManager.createJob();
+    job.setDescription("Test Job");
+    job.setConnectionName("Web Connection");
+    job.setOutputConnectionName("Null Connection");
+    job.setType(job.TYPE_SPECIFIED);
+    job.setStartMethod(job.START_DISABLE);
+    job.setHopcountMode(job.HOPCOUNT_NEVERDELETE);
+
+    // Now, set up the document specification.
+    DocumentSpecification ds = job.getSpecification();
+    
+    // Set up the seed
+    SpecificationNode sn = new SpecificationNode(WebcrawlerConfig.NODE_SEEDS);
+    sn.setValue("http://localhost:8191/web/index.html\n");
+    ds.addChild(ds.getChildCount(),sn);
+    
+    sn = new SpecificationNode(WebcrawlerConfig.NODE_INCLUDES);
+    sn.setValue(".*\n");
+    ds.addChild(ds.getChildCount(),sn);
+    
+    sn = new SpecificationNode(WebcrawlerConfig.NODE_INCLUDESINDEX);
+    sn.setValue(".*\n");
+    ds.addChild(ds.getChildCount(),sn);
+
+    // Set up the output specification.
+    OutputSpecification os = job.getOutputSpecification();
+    // Null output connections have no output specification, so this is a no-op.
+    
+    // Save the job.
+    jobManager.save(job);
+
+    // Now, start the job, and wait until it completes.
+    long startTime = System.currentTimeMillis();
+    jobManager.manualStart(job.getID());
+    instance.waitJobInactiveNative(jobManager,job.getID(),600000L);
+    System.err.println("Crawl required "+new Long(System.currentTimeMillis()-startTime).toString()+" milliseconds");
+
+    // Check to be sure we actually processed the right number of documents.
+    JobStatus status = jobManager.getStatus(job.getID());
+    if (status.getDocumentsProcessed() != 101)
+    {
+      throw new ManifoldCFException("Wrong number of documents processed - expected 101, saw "+new Long(status.getDocumentsProcessed()).toString());
+    }
+    
+    // Now, delete the job.
+    jobManager.deleteJob(job.getID());
+    instance.waitJobDeletedNative(jobManager,job.getID(),600000L);
+      
+    // Cleanup is automatic by the base class, so we can feel free to leave jobs and connections lying around.
+  }
+  
+}

Propchange: manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/SessionTester.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: manifoldcf/trunk/tests/webcrawler/src/test/java/org/apache/manifoldcf/webcrawler_tests/SessionTester.java
------------------------------------------------------------------------------
    svn:keywords = Id