You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2012/06/18 23:18:48 UTC

svn commit: r1351490 - in /manifoldcf/trunk: ./ connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/

Author: kwright
Date: Mon Jun 18 21:18:47 2012
New Revision: 1351490

URL: http://svn.apache.org/viewvc?rev=1351490&view=rev
Log:
Add proxy support to Web Connector.  Fix for CONNECTORS-483.

Modified:
    manifoldcf/trunk/CHANGES.txt
    manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/IThrottledConnection.java
    manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
    manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConfig.java
    manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
    manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties
    manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_ja_JP.properties

Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1351490&r1=1351489&r2=1351490&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Mon Jun 18 21:18:47 2012
@@ -3,6 +3,9 @@ $Id$
 
 ======================= 0.6-dev =====================
 
+CONNECTORS-483: Add NTLM proxy support for Web Connector.
+(Karl Wright)
+
 CONNECTORS-482: Need to include at least a portion of the HTTP
 body in history message whenever a non-200 HTTP code comes back.
 (Karl Wright)

Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/IThrottledConnection.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/IThrottledConnection.java?rev=1351490&r1=1351489&r2=1351490&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/IThrottledConnection.java (original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/IThrottledConnection.java Mon Jun 18 21:18:47 2012
@@ -64,7 +64,8 @@ public interface IThrottledConnection
   */
   public void executeFetch(String urlPath, String userAgent, String from, int connectionTimeoutMilliseconds,
     int socketTimeoutMilliseconds, boolean redirectOK, String host, FormData formData,
-    LoginCookies loginCookies)
+    LoginCookies loginCookies,
+    String proxyHost, int proxyPort, String proxyAuthDomain, String proxyAuthUsername, String proxyAuthPassword)
     throws ManifoldCFException, ServiceInterruption;
 
   /** Get the http response code.

Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java?rev=1351490&r1=1351489&r2=1351490&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java (original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java Mon Jun 18 21:18:47 2012
@@ -63,6 +63,23 @@ public class ThrottledFetcher
   * can get pulled out of all the right pools and wind up in only the hands of one thread. */
   protected static Integer poolLock = new Integer(0);
 
+  /** Current host name */
+  private static String currentHost = null;
+  static
+  {
+    // Find the current host name
+    try
+    {
+      java.net.InetAddress addr = java.net.InetAddress.getLocalHost();
+
+      // Get hostname
+      currentHost = addr.getHostName();
+    }
+    catch (java.net.UnknownHostException e)
+    {
+    }
+  }
+
   /** The read chunk length */
   protected static final int READ_CHUNK_LENGTH = 4096;
 
@@ -1278,7 +1295,8 @@ public class ThrottledFetcher
     */
     public void executeFetch(String urlPath, String userAgent, String from, int connectionTimeoutMilliseconds,
       int socketTimeoutMilliseconds, boolean redirectOK, String host, FormData formData,
-      LoginCookies loginCookies)
+      LoginCookies loginCookies,
+      String proxyHost, int proxyPort, String proxyAuthDomain, String proxyAuthUsername, String proxyAuthPassword)
       throws ManifoldCFException, ServiceInterruption
     {
       StringBuilder sb = new StringBuilder(protocol);
@@ -1342,6 +1360,22 @@ public class ThrottledFetcher
         // Set up protocol to use
         clientConf.setParams(new HostParams());
         clientConf.setHost(server,port,myFactory.getProtocol(protocol));
+        // If there's a proxy, set that too.
+        if (proxyHost != null && proxyHost.length() > 0)
+        {
+          clientConf.setProxy(proxyHost,proxyPort);
+          if (proxyAuthUsername != null && proxyAuthUsername.length() > 0)
+          {
+            if (proxyAuthPassword == null)
+              proxyAuthPassword = "";
+            if (proxyAuthDomain == null)
+              proxyAuthDomain = "";
+            // Set up NTLM credentials for this fetch too.
+            client.getState().setProxyCredentials(AuthScope.ANY,
+              new NTCredentials(proxyAuthUsername,proxyAuthPassword,currentHost,proxyAuthDomain));
+          }
+        }
+
 
         if (Logging.connectors.isDebugEnabled())
           Logging.connectors.debug("WEB: Got an HttpClient object after "+new Long(System.currentTimeMillis()-startTime).toString()+" ms.");

Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConfig.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConfig.java?rev=1351490&r1=1351489&r2=1351490&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConfig.java (original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConfig.java Mon Jun 18 21:18:47 2012
@@ -46,6 +46,16 @@ public class WebcrawlerConfig
   public static final String PARAMETER_ROBOTSUSAGE = "Robots usage";
   /** Email (a parameter) */
   public static final String PARAMETER_EMAIL = "Email address";
+  /** Proxy host name (parameter) */
+  public static final String PARAMETER_PROXYHOST = "Proxy host";
+  /** Proxy port (parameter) */
+  public static final String PARAMETER_PROXYPORT = "Proxy port";
+  /** Proxy auth domain (parameter) */
+  public static final String PARAMETER_PROXYAUTHDOMAIN = "Proxy authentication domain";
+  /** Proxy auth username (parameter) */
+  public static final String PARAMETER_PROXYAUTHUSERNAME = "Proxy authentication user name";
+  /** Proxy auth password (parameter) */
+  public static final String PARAMETER_PROXYAUTHPASSWORD = "Proxy authentication password";
   /** The bin description node */
   public static final String NODE_BINDESC = "bindesc";
   /** The bin regular expression */

Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java?rev=1351490&r1=1351489&r2=1351490&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java (original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java Mon Jun 18 21:18:47 2012
@@ -187,6 +187,21 @@ public class WebcrawlerConnector extends
   /** This is where we keep data around between the getVersions() phase and the processDocuments() phase. */
   protected static DataCache cache = new DataCache();
 
+  /** Proxy host */
+  protected String proxyHost = null;
+  
+  /** Proxy port */
+  protected int proxyPort = -1;
+  
+  /** Proxy auth domain */
+  protected String proxyAuthDomain = null;
+  
+  /** Proxy auth user name */
+  protected String proxyAuthUsername = null;
+  
+  /** Proxy auth password */
+  protected String proxyAuthPassword = null;
+  
   /** Deny access token for default authority */
   private final static String defaultAuthorityDenyToken = "DEAD_AUTHORITY";
 
@@ -362,6 +377,25 @@ public class WebcrawlerConnector extends
       credentialsDescription = new CredentialsDescription(params);
       trustsDescription = new TrustsDescription(params);
 
+      proxyHost = params.getParameter(WebcrawlerConfig.PARAMETER_PROXYHOST);
+      String proxyPortString = params.getParameter(WebcrawlerConfig.PARAMETER_PROXYPORT);
+      proxyAuthDomain = params.getParameter(WebcrawlerConfig.PARAMETER_PROXYAUTHDOMAIN);
+      proxyAuthUsername = params.getParameter(WebcrawlerConfig.PARAMETER_PROXYAUTHUSERNAME);
+      proxyAuthPassword = params.getObfuscatedParameter(WebcrawlerConfig.PARAMETER_PROXYAUTHPASSWORD);
+
+      proxyPort = -1;
+      if (proxyPortString != null && proxyPortString.length() > 0)
+      {
+        try
+        {
+          proxyPort = Integer.parseInt(proxyPortString);
+        }
+        catch (NumberFormatException e)
+        {
+          throw new ManifoldCFException(e.getMessage(),e);
+        }
+      }
+
       isInitialized = true;
     }
   }
@@ -400,6 +434,12 @@ public class WebcrawlerConnector extends
     trustsDescription = null;
     userAgent = null;
     from = null;
+    proxyHost = null;
+    proxyPort = -1;
+    proxyAuthDomain = null;
+    proxyAuthUsername = null;
+    proxyAuthPassword = null;
+
     isInitialized = false;
 
     super.disconnect();
@@ -655,7 +695,7 @@ public class WebcrawlerConnector extends
                 // Check robots, if enabled, and if we're fetching the primary document identifier.  See comment above.
                 int robotsStatus = RESULTSTATUS_TRUE;
                 if (!documentIdentifier.equals(currentURI) || robotsUsage < ROBOTS_DATA || (robotsStatus = checkFetchAllowed(documentIdentifier,protocol,ipAddress,port,credential,trustStore,hostName,binNames,currentTime,
-                  url.getFile(),activities,connectionLimit)) == RESULTSTATUS_TRUE)
+                  url.getFile(),activities,connectionLimit,proxyHost,proxyPort,proxyAuthDomain,proxyAuthUsername,proxyAuthPassword)) == RESULTSTATUS_TRUE)
                 {
                   // Passed the robots check!
 
@@ -679,7 +719,8 @@ public class WebcrawlerConnector extends
 
                       // Execute the fetch!
                       connection.executeFetch(url.getFile(),userAgent,from,connectionTimeoutMilliseconds,
-                        socketTimeoutMilliseconds,false,hostName,formData,lc);
+                        socketTimeoutMilliseconds,false,hostName,formData,lc,
+                        proxyHost,proxyPort,proxyAuthDomain,proxyAuthUsername,proxyAuthPassword);
                       int response = connection.getResponseCode();
 
                       if (response == 200 || response == 302 || response == 301)
@@ -1430,6 +1471,8 @@ public class WebcrawlerConnector extends
     tabsArray.add(Messages.getString(locale,"WebcrawlerConnector.Bandwidth"));
     tabsArray.add(Messages.getString(locale,"WebcrawlerConnector.AccessCredentials"));
     tabsArray.add(Messages.getString(locale,"WebcrawlerConnector.Certificates"));
+    tabsArray.add(Messages.getString(locale,"WebcrawlerConnector.Proxy"));
+
     out.print(
 "<script type=\"text/javascript\">\n"+
 "<!--\n"+
@@ -1759,12 +1802,67 @@ public class WebcrawlerConnector extends
     throws ManifoldCFException, IOException
   {
     
-    String email = parameters.getParameter(org.apache.manifoldcf.crawler.connectors.webcrawler.WebcrawlerConfig.PARAMETER_EMAIL);
+    String email = parameters.getParameter(WebcrawlerConfig.PARAMETER_EMAIL);
     if (email == null)
       email = "";
-    String robotsUsage = parameters.getParameter(org.apache.manifoldcf.crawler.connectors.webcrawler.WebcrawlerConfig.PARAMETER_ROBOTSUSAGE);
+    String robotsUsage = parameters.getParameter(WebcrawlerConfig.PARAMETER_ROBOTSUSAGE);
     if (robotsUsage == null)
       robotsUsage = "all";
+    String proxyHost = parameters.getParameter(WebcrawlerConfig.PARAMETER_PROXYHOST);
+    if (proxyHost == null)
+      proxyHost = "";
+    String proxyPort = parameters.getParameter(WebcrawlerConfig.PARAMETER_PROXYPORT);
+    if (proxyPort == null)
+      proxyPort = "";
+    String proxyAuthDomain = parameters.getParameter(WebcrawlerConfig.PARAMETER_PROXYAUTHDOMAIN);
+    if (proxyAuthDomain == null)
+      proxyAuthDomain = "";
+    String proxyAuthUsername = parameters.getParameter(WebcrawlerConfig.PARAMETER_PROXYAUTHUSERNAME);
+    if (proxyAuthUsername == null)
+      proxyAuthUsername = "";
+    String proxyAuthPassword = parameters.getObfuscatedParameter(WebcrawlerConfig.PARAMETER_PROXYAUTHPASSWORD);
+    if (proxyAuthPassword == null)
+      proxyAuthPassword = "";
+
+    // Proxy tab
+    if (tabName.equals(Messages.getString(locale,"WebcrawlerConnector.Proxy")))
+    {
+      out.print(
+"<table class=\"displaytable\">\n"+
+"  <tr><td class=\"separator\" colspan=\"2\"><hr/></td></tr>\n"+
+"  <tr>\n"+
+"    <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"WebcrawlerConnector.ProxyHostColon") + "</nobr></td>\n"+
+"    <td class=\"value\"><input type=\"text\" size=\"40\" name=\"proxyhost\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(proxyHost)+"\"/></td>\n"+
+"  </tr>\n"+
+"  <tr>\n"+
+"    <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"WebcrawlerConnector.ProxyPortColon") + "</nobr></td>\n"+
+"    <td class=\"value\"><input type=\"text\" size=\"5\" name=\"proxyport\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(proxyPort)+"\"/></td>\n"+
+"  </tr>\n"+
+"  <tr>\n"+
+"    <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"WebcrawlerConnector.ProxyAuthenticationDomainColon") + "</nobr></td>\n"+
+"    <td class=\"value\"><input type=\"text\" size=\"32\" name=\"proxyauthdomain\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(proxyAuthDomain)+"\"/></td>\n"+
+"  </tr>\n"+
+"  <tr>\n"+
+"    <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"WebcrawlerConnector.ProxyAuthenticationUserNameColon") + "</nobr></td>\n"+
+"    <td class=\"value\"><input type=\"text\" size=\"32\" name=\"proxyauthusername\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(proxyAuthUsername)+"\"/></td>\n"+
+"  </tr>\n"+
+"  <tr>\n"+
+"    <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"WebcrawlerConnector.ProxyAuthenticationPasswordColon") + "</nobr></td>\n"+
+"    <td class=\"value\"><input type=\"password\" size=\"16\" name=\"proxyauthpassword\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(proxyAuthPassword)+"\"/></td>\n"+
+"  </tr>\n"+
+"</table>\n"
+      );
+    }
+    else
+    {
+      out.print(
+"<input type=\"hidden\" name=\"proxyhost\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(proxyHost)+"\"/>\n"+
+"<input type=\"hidden\" name=\"proxyport\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(proxyPort)+"\"/>\n"+
+"<input type=\"hidden\" name=\"proxyauthusername\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(proxyAuthUsername)+"\"/>\n"+
+"<input type=\"hidden\" name=\"proxyauthdomain\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(proxyAuthDomain)+"\"/>\n"+
+"<input type=\"hidden\" name=\"proxyauthpassword\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(proxyAuthPassword)+"\"/>\n"
+      );
+    }
 
     // Email tab
     if (tabName.equals(Messages.getString(locale,"WebcrawlerConnector.Email")))
@@ -2681,10 +2779,25 @@ public class WebcrawlerConnector extends
   {
     String email = variableContext.getParameter("email");
     if (email != null)
-      parameters.setParameter(org.apache.manifoldcf.crawler.connectors.webcrawler.WebcrawlerConfig.PARAMETER_EMAIL,email);
+      parameters.setParameter(WebcrawlerConfig.PARAMETER_EMAIL,email);
     String robotsUsage = variableContext.getParameter("robotsusage");
     if (robotsUsage != null)
-      parameters.setParameter(org.apache.manifoldcf.crawler.connectors.webcrawler.WebcrawlerConfig.PARAMETER_ROBOTSUSAGE,robotsUsage);
+      parameters.setParameter(WebcrawlerConfig.PARAMETER_ROBOTSUSAGE,robotsUsage);
+    String proxyHost = variableContext.getParameter("proxyhost");
+    if (proxyHost != null)
+      parameters.setParameter(WebcrawlerConfig.PARAMETER_PROXYHOST,proxyHost);
+    String proxyPort = variableContext.getParameter("proxyport");
+    if (proxyPort != null)
+      parameters.setParameter(WebcrawlerConfig.PARAMETER_PROXYPORT,proxyPort);
+    String proxyAuthDomain = variableContext.getParameter("proxyauthdomain");
+    if (proxyAuthDomain != null)
+      parameters.setParameter(WebcrawlerConfig.PARAMETER_PROXYAUTHDOMAIN,proxyAuthDomain);
+    String proxyAuthUsername = variableContext.getParameter("proxyauthusername");
+    if (proxyAuthUsername != null)
+      parameters.setParameter(WebcrawlerConfig.PARAMETER_PROXYAUTHUSERNAME,proxyAuthUsername);
+    String proxyAuthPassword = variableContext.getParameter("proxyauthpassword");
+    if (proxyAuthPassword != null)
+      parameters.setObfuscatedParameter(WebcrawlerConfig.PARAMETER_PROXYAUTHPASSWORD,proxyAuthPassword);
 
     String x = variableContext.getParameter("bandwidth_count");
     if (x != null && x.length() > 0)
@@ -3056,14 +3169,27 @@ public class WebcrawlerConnector extends
     Locale locale, ConfigParams parameters)
     throws ManifoldCFException, IOException
   {
-    String email = parameters.getParameter(org.apache.manifoldcf.crawler.connectors.webcrawler.WebcrawlerConfig.PARAMETER_EMAIL);
-    String robots = parameters.getParameter(org.apache.manifoldcf.crawler.connectors.webcrawler.WebcrawlerConfig.PARAMETER_ROBOTSUSAGE);
+    String email = parameters.getParameter(WebcrawlerConfig.PARAMETER_EMAIL);
+    String robots = parameters.getParameter(WebcrawlerConfig.PARAMETER_ROBOTSUSAGE);
     if (robots.equals("none"))
       robots = "Ignore robots.txt";
     else if (robots.equals("data"))
       robots = "Obey robots.txt for data fetches only";
     else if (robots.equals("all"))
       robots = "Obey robots.txt for all fetches";
+    String proxyHost = parameters.getParameter(WebcrawlerConfig.PARAMETER_PROXYHOST);
+    if (proxyHost == null)
+      proxyHost = "";
+    String proxyPort = parameters.getParameter(WebcrawlerConfig.PARAMETER_PROXYPORT);
+    if (proxyPort == null)
+      proxyPort = "";
+    String proxyAuthDomain = parameters.getParameter(WebcrawlerConfig.PARAMETER_PROXYAUTHDOMAIN);
+    if (proxyAuthDomain == null)
+      proxyAuthDomain = "";
+    String proxyAuthUsername = parameters.getParameter(WebcrawlerConfig.PARAMETER_PROXYAUTHUSERNAME);
+    if (proxyAuthUsername == null)
+      proxyAuthUsername = "";
+
     out.print(
 "<table class=\"displaytable\">\n"+
 "  <tr>\n"+
@@ -3073,6 +3199,18 @@ public class WebcrawlerConnector extends
 "    <td class=\"value\" colspan=\"1\"><nobr>"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(robots)+"</nobr></td>\n"+
 "  </tr>\n"+
 "  <tr>\n"+
+"    <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"WebcrawlerConnector.ProxyHostColon") + "</nobr></td>\n"+
+"    <td class=\"value\">"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(proxyHost)+"</td>\n"+
+"    <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"WebcrawlerConnector.ProxyPortColon") + "</nobr></td>\n"+
+"    <td class=\"value\">"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(proxyPort)+"</td>\n"+
+"  </tr>\n"+
+"  <tr>\n"+
+"    <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"WebcrawlerConnector.ProxyAuthenticationDomainColon") + "</nobr></td>\n"+
+"    <td class=\"value\">"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(proxyAuthDomain)+"</td>\n"+
+"    <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"WebcrawlerConnector.ProxyAuthenticationUserNameColon") + "</nobr></td>\n"+
+"    <td class=\"value\">"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(proxyAuthUsername)+"</td>\n"+
+"  </tr>\n"+
+"  <tr>\n"+
 "    <td class=\"description\" colspan=\"1\"><nobr>"+Messages.getBodyString(locale,"WebcrawlerConnector.BandwidthThrottling")+"</nobr></td>\n"+
 "    <td class=\"boxcell\" colspan=\"3\">\n"+
 "      <table class=\"formtable\">\n"+
@@ -4883,7 +5021,8 @@ public class WebcrawlerConnector extends
   *@return appropriate resultstatus code.
   */
   protected int checkFetchAllowed(String documentIdentifier, String protocol, String hostIPAddress, int port, PageCredentials credential,
-    IKeystoreManager trustStore, String hostName, String[] binNames, long currentTime, String pathString, IVersionActivity versionActivities, int connectionLimit)
+    IKeystoreManager trustStore, String hostName, String[] binNames, long currentTime, String pathString, IVersionActivity versionActivities, int connectionLimit,
+    String proxyHost, int proxyPort, String proxyAuthDomain, String proxyAuthUsername, String proxyAuthPassword)
     throws ManifoldCFException, ServiceInterruption
   {
     // hostNameAndPort is the key for looking up the robots file in the database
@@ -4917,7 +5056,8 @@ public class WebcrawlerConnector extends
           connection.beginFetch(FETCH_ROBOTS);
           try
           {
-            connection.executeFetch("/robots.txt",userAgent,from,connectionTimeoutMilliseconds,socketTimeoutMilliseconds,true,hostName,null,null);
+            connection.executeFetch("/robots.txt",userAgent,from,connectionTimeoutMilliseconds,socketTimeoutMilliseconds,true,hostName,null,null,
+              proxyHost,proxyPort,proxyAuthDomain,proxyAuthUsername,proxyAuthPassword);
             long expirationTime = currentTime+1000*60*60*24;
             int code = connection.getResponseCode();
             if (code == 200)

Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties?rev=1351490&r1=1351489&r2=1351490&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties (original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties Mon Jun 18 21:18:47 2012
@@ -18,6 +18,12 @@ WebcrawlerConnector.Robots=Robots
 WebcrawlerConnector.Bandwidth=Bandwidth
 WebcrawlerConnector.AccessCredentials=Access Credentials
 WebcrawlerConnector.Certificates=Certificates
+WebcrawlerConnector.Proxy=Proxy
+WebcrawlerConnector.ProxyHostColon=Proxy host:
+WebcrawlerConnector.ProxyPortColon=Proxy port:
+WebcrawlerConnector.ProxyAuthenticationDomainColon=Proxy authentication domain:
+WebcrawlerConnector.ProxyAuthenticationUserNameColon=Proxy authentication user name:
+WebcrawlerConnector.ProxyAuthenticationPasswordColon=Proxy authentication password:
 WebcrawlerConnector.EmailAddressToContact=Email address to contact:
 WebcrawlerConnector.RobotsTxtUsage=Robots.txt usage:
 WebcrawlerConnector.DontLookAtRobotsTxt=Don't look at robots.txt

Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_ja_JP.properties
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_ja_JP.properties?rev=1351490&r1=1351489&r2=1351490&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_ja_JP.properties (original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_ja_JP.properties Mon Jun 18 21:18:47 2012
@@ -18,6 +18,12 @@ WebcrawlerConnector.Robots=ロボ�
 WebcrawlerConnector.Bandwidth=バンド幅
 WebcrawlerConnector.AccessCredentials=アクセス証明
 WebcrawlerConnector.Certificates=証明証
+WebcrawlerConnector.Proxy=プロキシ
+WebcrawlerConnector.ProxyHostColon=プロキシホスト:
+WebcrawlerConnector.ProxyPortColon=プロキシポート:
+WebcrawlerConnector.ProxyAuthenticationDomainColon=プロキシ認証ドメイン:
+WebcrawlerConnector.ProxyAuthenticationUserNameColon=プロキシ認証ユーザ名:
+WebcrawlerConnector.ProxyAuthenticationPasswordColon=プロキシ認証パスワード:
 WebcrawlerConnector.EmailAddressToContact=連絡先メールアドレス:
 WebcrawlerConnector.RobotsTxtUsage=Robots.txt:
 WebcrawlerConnector.DontLookAtRobotsTxt=robots.txtを利用しない