You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2012/06/18 23:18:48 UTC
svn commit: r1351490 - in /manifoldcf/trunk: ./
connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/
connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/
Author: kwright
Date: Mon Jun 18 21:18:47 2012
New Revision: 1351490
URL: http://svn.apache.org/viewvc?rev=1351490&view=rev
Log:
Add proxy support to Web Connector. Fix for CONNECTORS-483.
Modified:
manifoldcf/trunk/CHANGES.txt
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/IThrottledConnection.java
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConfig.java
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_ja_JP.properties
Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1351490&r1=1351489&r2=1351490&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Mon Jun 18 21:18:47 2012
@@ -3,6 +3,9 @@ $Id$
======================= 0.6-dev =====================
+CONNECTORS-483: Add NTLM proxy support for Web Connector.
+(Karl Wright)
+
CONNECTORS-482: Need to include at least a portion of the HTTP
body in history message whenever a non-200 HTTP code comes back.
(Karl Wright)
Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/IThrottledConnection.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/IThrottledConnection.java?rev=1351490&r1=1351489&r2=1351490&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/IThrottledConnection.java (original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/IThrottledConnection.java Mon Jun 18 21:18:47 2012
@@ -64,7 +64,8 @@ public interface IThrottledConnection
*/
public void executeFetch(String urlPath, String userAgent, String from, int connectionTimeoutMilliseconds,
int socketTimeoutMilliseconds, boolean redirectOK, String host, FormData formData,
- LoginCookies loginCookies)
+ LoginCookies loginCookies,
+ String proxyHost, int proxyPort, String proxyAuthDomain, String proxyAuthUsername, String proxyAuthPassword)
throws ManifoldCFException, ServiceInterruption;
/** Get the http response code.
Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java?rev=1351490&r1=1351489&r2=1351490&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java (original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java Mon Jun 18 21:18:47 2012
@@ -63,6 +63,23 @@ public class ThrottledFetcher
* can get pulled out of all the right pools and wind up in only the hands of one thread. */
protected static Integer poolLock = new Integer(0);
+ /** Current host name */
+ private static String currentHost = null;
+ static
+ {
+ // Find the current host name
+ try
+ {
+ java.net.InetAddress addr = java.net.InetAddress.getLocalHost();
+
+ // Get hostname
+ currentHost = addr.getHostName();
+ }
+ catch (java.net.UnknownHostException e)
+ {
+ }
+ }
+
/** The read chunk length */
protected static final int READ_CHUNK_LENGTH = 4096;
@@ -1278,7 +1295,8 @@ public class ThrottledFetcher
*/
public void executeFetch(String urlPath, String userAgent, String from, int connectionTimeoutMilliseconds,
int socketTimeoutMilliseconds, boolean redirectOK, String host, FormData formData,
- LoginCookies loginCookies)
+ LoginCookies loginCookies,
+ String proxyHost, int proxyPort, String proxyAuthDomain, String proxyAuthUsername, String proxyAuthPassword)
throws ManifoldCFException, ServiceInterruption
{
StringBuilder sb = new StringBuilder(protocol);
@@ -1342,6 +1360,22 @@ public class ThrottledFetcher
// Set up protocol to use
clientConf.setParams(new HostParams());
clientConf.setHost(server,port,myFactory.getProtocol(protocol));
+ // If there's a proxy, set that too.
+ if (proxyHost != null && proxyHost.length() > 0)
+ {
+ clientConf.setProxy(proxyHost,proxyPort);
+ if (proxyAuthUsername != null && proxyAuthUsername.length() > 0)
+ {
+ if (proxyAuthPassword == null)
+ proxyAuthPassword = "";
+ if (proxyAuthDomain == null)
+ proxyAuthDomain = "";
+ // Set up NTLM credentials for this fetch too.
+ client.getState().setProxyCredentials(AuthScope.ANY,
+ new NTCredentials(proxyAuthUsername,proxyAuthPassword,currentHost,proxyAuthDomain));
+ }
+ }
+
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("WEB: Got an HttpClient object after "+new Long(System.currentTimeMillis()-startTime).toString()+" ms.");
Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConfig.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConfig.java?rev=1351490&r1=1351489&r2=1351490&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConfig.java (original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConfig.java Mon Jun 18 21:18:47 2012
@@ -46,6 +46,16 @@ public class WebcrawlerConfig
public static final String PARAMETER_ROBOTSUSAGE = "Robots usage";
/** Email (a parameter) */
public static final String PARAMETER_EMAIL = "Email address";
+ /** Proxy host name (parameter) */
+ public static final String PARAMETER_PROXYHOST = "Proxy host";
+ /** Proxy port (parameter) */
+ public static final String PARAMETER_PROXYPORT = "Proxy port";
+ /** Proxy auth domain (parameter) */
+ public static final String PARAMETER_PROXYAUTHDOMAIN = "Proxy authentication domain";
+ /** Proxy auth username (parameter) */
+ public static final String PARAMETER_PROXYAUTHUSERNAME = "Proxy authentication user name";
+ /** Proxy auth password (parameter) */
+ public static final String PARAMETER_PROXYAUTHPASSWORD = "Proxy authentication password";
/** The bin description node */
public static final String NODE_BINDESC = "bindesc";
/** The bin regular expression */
Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java?rev=1351490&r1=1351489&r2=1351490&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java (original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java Mon Jun 18 21:18:47 2012
@@ -187,6 +187,21 @@ public class WebcrawlerConnector extends
/** This is where we keep data around between the getVersions() phase and the processDocuments() phase. */
protected static DataCache cache = new DataCache();
+ /** Proxy host */
+ protected String proxyHost = null;
+
+ /** Proxy port */
+ protected int proxyPort = -1;
+
+ /** Proxy auth domain */
+ protected String proxyAuthDomain = null;
+
+ /** Proxy auth user name */
+ protected String proxyAuthUsername = null;
+
+ /** Proxy auth password */
+ protected String proxyAuthPassword = null;
+
/** Deny access token for default authority */
private final static String defaultAuthorityDenyToken = "DEAD_AUTHORITY";
@@ -362,6 +377,25 @@ public class WebcrawlerConnector extends
credentialsDescription = new CredentialsDescription(params);
trustsDescription = new TrustsDescription(params);
+ proxyHost = params.getParameter(WebcrawlerConfig.PARAMETER_PROXYHOST);
+ String proxyPortString = params.getParameter(WebcrawlerConfig.PARAMETER_PROXYPORT);
+ proxyAuthDomain = params.getParameter(WebcrawlerConfig.PARAMETER_PROXYAUTHDOMAIN);
+ proxyAuthUsername = params.getParameter(WebcrawlerConfig.PARAMETER_PROXYAUTHUSERNAME);
+ proxyAuthPassword = params.getObfuscatedParameter(WebcrawlerConfig.PARAMETER_PROXYAUTHPASSWORD);
+
+ proxyPort = -1;
+ if (proxyPortString != null && proxyPortString.length() > 0)
+ {
+ try
+ {
+ proxyPort = Integer.parseInt(proxyPortString);
+ }
+ catch (NumberFormatException e)
+ {
+ throw new ManifoldCFException(e.getMessage(),e);
+ }
+ }
+
isInitialized = true;
}
}
@@ -400,6 +434,12 @@ public class WebcrawlerConnector extends
trustsDescription = null;
userAgent = null;
from = null;
+ proxyHost = null;
+ proxyPort = -1;
+ proxyAuthDomain = null;
+ proxyAuthUsername = null;
+ proxyAuthPassword = null;
+
isInitialized = false;
super.disconnect();
@@ -655,7 +695,7 @@ public class WebcrawlerConnector extends
// Check robots, if enabled, and if we're fetching the primary document identifier. See comment above.
int robotsStatus = RESULTSTATUS_TRUE;
if (!documentIdentifier.equals(currentURI) || robotsUsage < ROBOTS_DATA || (robotsStatus = checkFetchAllowed(documentIdentifier,protocol,ipAddress,port,credential,trustStore,hostName,binNames,currentTime,
- url.getFile(),activities,connectionLimit)) == RESULTSTATUS_TRUE)
+ url.getFile(),activities,connectionLimit,proxyHost,proxyPort,proxyAuthDomain,proxyAuthUsername,proxyAuthPassword)) == RESULTSTATUS_TRUE)
{
// Passed the robots check!
@@ -679,7 +719,8 @@ public class WebcrawlerConnector extends
// Execute the fetch!
connection.executeFetch(url.getFile(),userAgent,from,connectionTimeoutMilliseconds,
- socketTimeoutMilliseconds,false,hostName,formData,lc);
+ socketTimeoutMilliseconds,false,hostName,formData,lc,
+ proxyHost,proxyPort,proxyAuthDomain,proxyAuthUsername,proxyAuthPassword);
int response = connection.getResponseCode();
if (response == 200 || response == 302 || response == 301)
@@ -1430,6 +1471,8 @@ public class WebcrawlerConnector extends
tabsArray.add(Messages.getString(locale,"WebcrawlerConnector.Bandwidth"));
tabsArray.add(Messages.getString(locale,"WebcrawlerConnector.AccessCredentials"));
tabsArray.add(Messages.getString(locale,"WebcrawlerConnector.Certificates"));
+ tabsArray.add(Messages.getString(locale,"WebcrawlerConnector.Proxy"));
+
out.print(
"<script type=\"text/javascript\">\n"+
"<!--\n"+
@@ -1759,12 +1802,67 @@ public class WebcrawlerConnector extends
throws ManifoldCFException, IOException
{
- String email = parameters.getParameter(org.apache.manifoldcf.crawler.connectors.webcrawler.WebcrawlerConfig.PARAMETER_EMAIL);
+ String email = parameters.getParameter(WebcrawlerConfig.PARAMETER_EMAIL);
if (email == null)
email = "";
- String robotsUsage = parameters.getParameter(org.apache.manifoldcf.crawler.connectors.webcrawler.WebcrawlerConfig.PARAMETER_ROBOTSUSAGE);
+ String robotsUsage = parameters.getParameter(WebcrawlerConfig.PARAMETER_ROBOTSUSAGE);
if (robotsUsage == null)
robotsUsage = "all";
+ String proxyHost = parameters.getParameter(WebcrawlerConfig.PARAMETER_PROXYHOST);
+ if (proxyHost == null)
+ proxyHost = "";
+ String proxyPort = parameters.getParameter(WebcrawlerConfig.PARAMETER_PROXYPORT);
+ if (proxyPort == null)
+ proxyPort = "";
+ String proxyAuthDomain = parameters.getParameter(WebcrawlerConfig.PARAMETER_PROXYAUTHDOMAIN);
+ if (proxyAuthDomain == null)
+ proxyAuthDomain = "";
+ String proxyAuthUsername = parameters.getParameter(WebcrawlerConfig.PARAMETER_PROXYAUTHUSERNAME);
+ if (proxyAuthUsername == null)
+ proxyAuthUsername = "";
+ String proxyAuthPassword = parameters.getObfuscatedParameter(WebcrawlerConfig.PARAMETER_PROXYAUTHPASSWORD);
+ if (proxyAuthPassword == null)
+ proxyAuthPassword = "";
+
+ // Proxy tab
+ if (tabName.equals(Messages.getString(locale,"WebcrawlerConnector.Proxy")))
+ {
+ out.print(
+"<table class=\"displaytable\">\n"+
+" <tr><td class=\"separator\" colspan=\"2\"><hr/></td></tr>\n"+
+" <tr>\n"+
+" <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"WebcrawlerConnector.ProxyHostColon") + "</nobr></td>\n"+
+" <td class=\"value\"><input type=\"text\" size=\"40\" name=\"proxyhost\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(proxyHost)+"\"/></td>\n"+
+" </tr>\n"+
+" <tr>\n"+
+" <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"WebcrawlerConnector.ProxyPortColon") + "</nobr></td>\n"+
+" <td class=\"value\"><input type=\"text\" size=\"5\" name=\"proxyport\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(proxyPort)+"\"/></td>\n"+
+" </tr>\n"+
+" <tr>\n"+
+" <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"WebcrawlerConnector.ProxyAuthenticationDomainColon") + "</nobr></td>\n"+
+" <td class=\"value\"><input type=\"text\" size=\"32\" name=\"proxyauthdomain\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(proxyAuthDomain)+"\"/></td>\n"+
+" </tr>\n"+
+" <tr>\n"+
+" <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"WebcrawlerConnector.ProxyAuthenticationUserNameColon") + "</nobr></td>\n"+
+" <td class=\"value\"><input type=\"text\" size=\"32\" name=\"proxyauthusername\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(proxyAuthUsername)+"\"/></td>\n"+
+" </tr>\n"+
+" <tr>\n"+
+" <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"WebcrawlerConnector.ProxyAuthenticationPasswordColon") + "</nobr></td>\n"+
+" <td class=\"value\"><input type=\"password\" size=\"16\" name=\"proxyauthpassword\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(proxyAuthPassword)+"\"/></td>\n"+
+" </tr>\n"+
+"</table>\n"
+ );
+ }
+ else
+ {
+ out.print(
+"<input type=\"hidden\" name=\"proxyhost\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(proxyHost)+"\"/>\n"+
+"<input type=\"hidden\" name=\"proxyport\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(proxyPort)+"\"/>\n"+
+"<input type=\"hidden\" name=\"proxyauthusername\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(proxyAuthUsername)+"\"/>\n"+
+"<input type=\"hidden\" name=\"proxyauthdomain\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(proxyAuthDomain)+"\"/>\n"+
+"<input type=\"hidden\" name=\"proxyauthpassword\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(proxyAuthPassword)+"\"/>\n"
+ );
+ }
// Email tab
if (tabName.equals(Messages.getString(locale,"WebcrawlerConnector.Email")))
@@ -2681,10 +2779,25 @@ public class WebcrawlerConnector extends
{
String email = variableContext.getParameter("email");
if (email != null)
- parameters.setParameter(org.apache.manifoldcf.crawler.connectors.webcrawler.WebcrawlerConfig.PARAMETER_EMAIL,email);
+ parameters.setParameter(WebcrawlerConfig.PARAMETER_EMAIL,email);
String robotsUsage = variableContext.getParameter("robotsusage");
if (robotsUsage != null)
- parameters.setParameter(org.apache.manifoldcf.crawler.connectors.webcrawler.WebcrawlerConfig.PARAMETER_ROBOTSUSAGE,robotsUsage);
+ parameters.setParameter(WebcrawlerConfig.PARAMETER_ROBOTSUSAGE,robotsUsage);
+ String proxyHost = variableContext.getParameter("proxyhost");
+ if (proxyHost != null)
+ parameters.setParameter(WebcrawlerConfig.PARAMETER_PROXYHOST,proxyHost);
+ String proxyPort = variableContext.getParameter("proxyport");
+ if (proxyPort != null)
+ parameters.setParameter(WebcrawlerConfig.PARAMETER_PROXYPORT,proxyPort);
+ String proxyAuthDomain = variableContext.getParameter("proxyauthdomain");
+ if (proxyAuthDomain != null)
+ parameters.setParameter(WebcrawlerConfig.PARAMETER_PROXYAUTHDOMAIN,proxyAuthDomain);
+ String proxyAuthUsername = variableContext.getParameter("proxyauthusername");
+ if (proxyAuthUsername != null)
+ parameters.setParameter(WebcrawlerConfig.PARAMETER_PROXYAUTHUSERNAME,proxyAuthUsername);
+ String proxyAuthPassword = variableContext.getParameter("proxyauthpassword");
+ if (proxyAuthPassword != null)
+ parameters.setObfuscatedParameter(WebcrawlerConfig.PARAMETER_PROXYAUTHPASSWORD,proxyAuthPassword);
String x = variableContext.getParameter("bandwidth_count");
if (x != null && x.length() > 0)
@@ -3056,14 +3169,27 @@ public class WebcrawlerConnector extends
Locale locale, ConfigParams parameters)
throws ManifoldCFException, IOException
{
- String email = parameters.getParameter(org.apache.manifoldcf.crawler.connectors.webcrawler.WebcrawlerConfig.PARAMETER_EMAIL);
- String robots = parameters.getParameter(org.apache.manifoldcf.crawler.connectors.webcrawler.WebcrawlerConfig.PARAMETER_ROBOTSUSAGE);
+ String email = parameters.getParameter(WebcrawlerConfig.PARAMETER_EMAIL);
+ String robots = parameters.getParameter(WebcrawlerConfig.PARAMETER_ROBOTSUSAGE);
if (robots.equals("none"))
robots = "Ignore robots.txt";
else if (robots.equals("data"))
robots = "Obey robots.txt for data fetches only";
else if (robots.equals("all"))
robots = "Obey robots.txt for all fetches";
+ String proxyHost = parameters.getParameter(WebcrawlerConfig.PARAMETER_PROXYHOST);
+ if (proxyHost == null)
+ proxyHost = "";
+ String proxyPort = parameters.getParameter(WebcrawlerConfig.PARAMETER_PROXYPORT);
+ if (proxyPort == null)
+ proxyPort = "";
+ String proxyAuthDomain = parameters.getParameter(WebcrawlerConfig.PARAMETER_PROXYAUTHDOMAIN);
+ if (proxyAuthDomain == null)
+ proxyAuthDomain = "";
+ String proxyAuthUsername = parameters.getParameter(WebcrawlerConfig.PARAMETER_PROXYAUTHUSERNAME);
+ if (proxyAuthUsername == null)
+ proxyAuthUsername = "";
+
out.print(
"<table class=\"displaytable\">\n"+
" <tr>\n"+
@@ -3073,6 +3199,18 @@ public class WebcrawlerConnector extends
" <td class=\"value\" colspan=\"1\"><nobr>"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(robots)+"</nobr></td>\n"+
" </tr>\n"+
" <tr>\n"+
+" <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"WebcrawlerConnector.ProxyHostColon") + "</nobr></td>\n"+
+" <td class=\"value\">"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(proxyHost)+"</td>\n"+
+" <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"WebcrawlerConnector.ProxyPortColon") + "</nobr></td>\n"+
+" <td class=\"value\">"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(proxyPort)+"</td>\n"+
+" </tr>\n"+
+" <tr>\n"+
+" <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"WebcrawlerConnector.ProxyAuthenticationDomainColon") + "</nobr></td>\n"+
+" <td class=\"value\">"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(proxyAuthDomain)+"</td>\n"+
+" <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"WebcrawlerConnector.ProxyAuthenticationUserNameColon") + "</nobr></td>\n"+
+" <td class=\"value\">"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(proxyAuthUsername)+"</td>\n"+
+" </tr>\n"+
+" <tr>\n"+
" <td class=\"description\" colspan=\"1\"><nobr>"+Messages.getBodyString(locale,"WebcrawlerConnector.BandwidthThrottling")+"</nobr></td>\n"+
" <td class=\"boxcell\" colspan=\"3\">\n"+
" <table class=\"formtable\">\n"+
@@ -4883,7 +5021,8 @@ public class WebcrawlerConnector extends
*@return appropriate resultstatus code.
*/
protected int checkFetchAllowed(String documentIdentifier, String protocol, String hostIPAddress, int port, PageCredentials credential,
- IKeystoreManager trustStore, String hostName, String[] binNames, long currentTime, String pathString, IVersionActivity versionActivities, int connectionLimit)
+ IKeystoreManager trustStore, String hostName, String[] binNames, long currentTime, String pathString, IVersionActivity versionActivities, int connectionLimit,
+ String proxyHost, int proxyPort, String proxyAuthDomain, String proxyAuthUsername, String proxyAuthPassword)
throws ManifoldCFException, ServiceInterruption
{
// hostNameAndPort is the key for looking up the robots file in the database
@@ -4917,7 +5056,8 @@ public class WebcrawlerConnector extends
connection.beginFetch(FETCH_ROBOTS);
try
{
- connection.executeFetch("/robots.txt",userAgent,from,connectionTimeoutMilliseconds,socketTimeoutMilliseconds,true,hostName,null,null);
+ connection.executeFetch("/robots.txt",userAgent,from,connectionTimeoutMilliseconds,socketTimeoutMilliseconds,true,hostName,null,null,
+ proxyHost,proxyPort,proxyAuthDomain,proxyAuthUsername,proxyAuthPassword);
long expirationTime = currentTime+1000*60*60*24;
int code = connection.getResponseCode();
if (code == 200)
Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties?rev=1351490&r1=1351489&r2=1351490&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties (original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties Mon Jun 18 21:18:47 2012
@@ -18,6 +18,12 @@ WebcrawlerConnector.Robots=Robots
WebcrawlerConnector.Bandwidth=Bandwidth
WebcrawlerConnector.AccessCredentials=Access Credentials
WebcrawlerConnector.Certificates=Certificates
+WebcrawlerConnector.Proxy=Proxy
+WebcrawlerConnector.ProxyHostColon=Proxy host:
+WebcrawlerConnector.ProxyPortColon=Proxy port:
+WebcrawlerConnector.ProxyAuthenticationDomainColon=Proxy authentication domain:
+WebcrawlerConnector.ProxyAuthenticationUserNameColon=Proxy authentication user name:
+WebcrawlerConnector.ProxyAuthenticationPasswordColon=Proxy authentication password:
WebcrawlerConnector.EmailAddressToContact=Email address to contact:
WebcrawlerConnector.RobotsTxtUsage=Robots.txt usage:
WebcrawlerConnector.DontLookAtRobotsTxt=Don't look at robots.txt
Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_ja_JP.properties
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_ja_JP.properties?rev=1351490&r1=1351489&r2=1351490&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_ja_JP.properties (original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_ja_JP.properties Mon Jun 18 21:18:47 2012
@@ -18,6 +18,12 @@ WebcrawlerConnector.Robots=ãã�
WebcrawlerConnector.Bandwidth=ãã³ãå¹
WebcrawlerConnector.AccessCredentials=ã¢ã¯ã»ã¹è¨¼æ
WebcrawlerConnector.Certificates=証æ証
+WebcrawlerConnector.Proxy=ãããã·
+WebcrawlerConnector.ProxyHostColon=ãããã·ãã¹ãï¼
+WebcrawlerConnector.ProxyPortColon=ãããã·ãã¼ãï¼
+WebcrawlerConnector.ProxyAuthenticationDomainColon=ãããã·èªè¨¼ãã¡ã¤ã³ï¼
+WebcrawlerConnector.ProxyAuthenticationUserNameColon=ãããã·èªè¨¼ã¦ã¼ã¶åï¼
+WebcrawlerConnector.ProxyAuthenticationPasswordColon=ãããã·èªè¨¼ãã¹ã¯ã¼ãï¼
WebcrawlerConnector.EmailAddressToContact=é£çµ¡å
ã¡ã¼ã«ã¢ãã¬ã¹ï¼
WebcrawlerConnector.RobotsTxtUsage=Robots.txtï¼
WebcrawlerConnector.DontLookAtRobotsTxt=robots.txtãå©ç¨ããªã