You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2015/01/09 07:34:37 UTC

svn commit: r1650447 [21/25] - in /nutch/branches/2.x: ./ src/java/org/apache/nutch/api/ src/java/org/apache/nutch/api/impl/ src/java/org/apache/nutch/api/impl/db/ src/java/org/apache/nutch/api/model/response/ src/java/org/apache/nutch/api/resources/ s...

Modified: nutch/branches/2.x/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java Fri Jan  9 06:34:33 2015
@@ -51,18 +51,17 @@ public class HttpResponse implements Res
   private final Metadata headers = new SpellCheckedMetadata();
 
   protected enum Scheme {
-    HTTP,
-    HTTPS,
+    HTTP, HTTPS,
   }
 
   public HttpResponse(HttpBase http, URL url, WebPage page)
-  throws ProtocolException, IOException {
+      throws ProtocolException, IOException {
 
     this.http = http;
     this.url = url;
 
     Scheme scheme = null;
- 
+
     if ("http".equals(url.getProtocol())) {
       scheme = Scheme.HTTP;
     } else if ("https".equals(url.getProtocol())) {
@@ -90,50 +89,56 @@ public class HttpResponse implements Res
       } else {
         port = 443;
       }
-      portString= "";
+      portString = "";
     } else {
-      port= url.getPort();
-      portString= ":" + port;
+      port = url.getPort();
+      portString = ":" + port;
     }
     Socket socket = null;
 
     try {
-      socket = new Socket();                    // create the socket
+      socket = new Socket(); // create the socket
       socket.setSoTimeout(http.getTimeout());
 
-
       // connect
       String sockHost = http.useProxy() ? http.getProxyHost() : host;
       int sockPort = http.useProxy() ? http.getProxyPort() : port;
-      InetSocketAddress sockAddr= new InetSocketAddress(sockHost, sockPort);
+      InetSocketAddress sockAddr = new InetSocketAddress(sockHost, sockPort);
       socket.connect(sockAddr, http.getTimeout());
-      
+
       if (scheme == Scheme.HTTPS) {
-        SSLSocketFactory factory = (SSLSocketFactory)SSLSocketFactory.getDefault();
-        SSLSocket sslsocket = (SSLSocket)factory.createSocket(socket, sockHost, sockPort, true);
+        SSLSocketFactory factory = (SSLSocketFactory) SSLSocketFactory
+            .getDefault();
+        SSLSocket sslsocket = (SSLSocket) factory.createSocket(socket,
+            sockHost, sockPort, true);
         sslsocket.setUseClientMode(true);
-        
-        // Get the protocols and ciphers supported by this JVM    
-        Set<String> protocols = new HashSet<String>(Arrays.asList(sslsocket.getSupportedProtocols()));
-        Set<String> ciphers = new HashSet<String>(Arrays.asList(sslsocket.getSupportedCipherSuites()));
-        
+
+        // Get the protocols and ciphers supported by this JVM
+        Set<String> protocols = new HashSet<String>(Arrays.asList(sslsocket
+            .getSupportedProtocols()));
+        Set<String> ciphers = new HashSet<String>(Arrays.asList(sslsocket
+            .getSupportedCipherSuites()));
+
         // Intersect with preferred protocols and ciphers
         protocols.retainAll(http.getTlsPreferredProtocols());
         ciphers.retainAll(http.getTlsPreferredCipherSuites());
-        
-        sslsocket.setEnabledProtocols(protocols.toArray(new String[protocols.size()]));
-        sslsocket.setEnabledCipherSuites(ciphers.toArray(new String[ciphers.size()]));
-        
+
+        sslsocket.setEnabledProtocols(protocols.toArray(new String[protocols
+            .size()]));
+        sslsocket.setEnabledCipherSuites(ciphers.toArray(new String[ciphers
+            .size()]));
+
         sslsocket.startHandshake();
         socket = sslsocket;
       }
-      
+
       conf = http.getConf();
       if (sockAddr != null
           && conf.getBoolean("store.ip.address", false) == true) {
-        String ipString = sockAddr.getAddress().getHostAddress(); //get the ip address
+        String ipString = sockAddr.getAddress().getHostAddress(); // get the ip
+                                                                  // address
         page.getMetadata().put(new Utf8("_ip_"),
-          ByteBuffer.wrap(ipString.getBytes()));
+            ByteBuffer.wrap(ipString.getBytes()));
       }
 
       // make request
@@ -141,9 +146,9 @@ public class HttpResponse implements Res
 
       StringBuffer reqStr = new StringBuffer("GET ");
       if (http.useProxy()) {
-      	reqStr.append(url.getProtocol()+"://"+host+portString+path);
+        reqStr.append(url.getProtocol() + "://" + host + portString + path);
       } else {
-      	reqStr.append(path);
+        reqStr.append(path);
       }
 
       reqStr.append(" HTTP/1.0\r\n");
@@ -161,39 +166,40 @@ public class HttpResponse implements Res
 
       String userAgent = http.getUserAgent();
       if ((userAgent == null) || (userAgent.length() == 0)) {
-        if (Http.LOG.isErrorEnabled()) { Http.LOG.error("User-agent is not set!"); }
+        if (Http.LOG.isErrorEnabled()) {
+          Http.LOG.error("User-agent is not set!");
+        }
       } else {
         reqStr.append("User-Agent: ");
         reqStr.append(userAgent);
         reqStr.append("\r\n");
       }
 
-//      if (page.isReadable(WebPage.Field.MODIFIED_TIME.getIndex())) {
-        reqStr.append("If-Modified-Since: " +
-                      HttpDateFormat.toString(page.getModifiedTime()));
-        reqStr.append("\r\n");
-//      }
+      // if (page.isReadable(WebPage.Field.MODIFIED_TIME.getIndex())) {
+      reqStr.append("If-Modified-Since: "
+          + HttpDateFormat.toString(page.getModifiedTime()));
+      reqStr.append("\r\n");
+      // }
       reqStr.append("\r\n");
 
-      byte[] reqBytes= reqStr.toString().getBytes();
+      byte[] reqBytes = reqStr.toString().getBytes();
 
       req.write(reqBytes);
       req.flush();
 
-      PushbackInputStream in =                  // process response
-        new PushbackInputStream(
-          new BufferedInputStream(socket.getInputStream(), Http.BUFFER_SIZE),
-          Http.BUFFER_SIZE) ;
+      PushbackInputStream in = // process response
+      new PushbackInputStream(new BufferedInputStream(socket.getInputStream(),
+          Http.BUFFER_SIZE), Http.BUFFER_SIZE);
 
       StringBuffer line = new StringBuffer();
 
-      boolean haveSeenNonContinueStatus= false;
+      boolean haveSeenNonContinueStatus = false;
       while (!haveSeenNonContinueStatus) {
         // parse status code line
         this.code = parseStatusLine(in, line);
         // parse headers
         parseHeaders(in, line);
-        haveSeenNonContinueStatus= code != 100; // 100 is "Continue"
+        haveSeenNonContinueStatus = code != 100; // 100 is "Continue"
       }
 
       String transferEncoding = getHeader(Response.TRANSFER_ENCODING);
@@ -228,10 +234,10 @@ public class HttpResponse implements Res
 
   }
 
-
-  /* ------------------------- *
-   * <implementation:Response> *
-   * ------------------------- */
+  /*
+   * ------------------------- * <implementation:Response> *
+   * -------------------------
+   */
 
   public URL getUrl() {
     return url;
@@ -253,15 +259,15 @@ public class HttpResponse implements Res
     return content;
   }
 
-  /* ------------------------- *
-   * <implementation:Response> *
-   * ------------------------- */
-
+  /*
+   * ------------------------- * <implementation:Response> *
+   * -------------------------
+   */
 
-  private void readPlainContent(InputStream in)
-    throws HttpException, IOException {
+  private void readPlainContent(InputStream in) throws HttpException,
+      IOException {
 
-    int contentLength = Integer.MAX_VALUE;    // get content length
+    int contentLength = Integer.MAX_VALUE; // get content length
     String contentLengthString = headers.get(Response.CONTENT_LENGTH);
     if (contentLengthString != null) {
       contentLengthString = contentLengthString.trim();
@@ -269,12 +275,13 @@ public class HttpResponse implements Res
         if (!contentLengthString.isEmpty())
           contentLength = Integer.parseInt(contentLengthString);
       } catch (NumberFormatException e) {
-        throw new HttpException("bad content length: "+contentLengthString);
+        throw new HttpException("bad content length: " + contentLengthString);
       }
     }
-    if (http.getMaxContent() >= 0
-      && contentLength > http.getMaxContent())   // limit download size
-      contentLength  = http.getMaxContent();
+    if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) // limit
+                                                                           // download
+                                                                           // size
+      contentLength = http.getMaxContent();
 
     ByteArrayOutputStream out = new ByteArrayOutputStream(Http.BUFFER_SIZE);
     byte[] bytes = new byte[Http.BUFFER_SIZE];
@@ -384,38 +391,37 @@ public class HttpResponse implements Res
     parseHeaders(in, line);
 
   }
-  
+
   private int parseStatusLine(PushbackInputStream in, StringBuffer line)
-    throws IOException, HttpException {
+      throws IOException, HttpException {
     readLine(in, line, false);
 
     int codeStart = line.indexOf(" ");
-    int codeEnd = line.indexOf(" ", codeStart+1);
+    int codeEnd = line.indexOf(" ", codeStart + 1);
 
     // handle lines with no plaintext result code, ie:
     // "HTTP/1.1 200" vs "HTTP/1.1 200 OK"
     if (codeEnd == -1)
-      codeEnd= line.length();
+      codeEnd = line.length();
 
     int code;
     try {
-      code= Integer.parseInt(line.substring(codeStart+1, codeEnd));
+      code = Integer.parseInt(line.substring(codeStart + 1, codeEnd));
     } catch (NumberFormatException e) {
-      throw new HttpException("bad status line '" + line
-                              + "': " + e.getMessage(), e);
+      throw new HttpException("bad status line '" + line + "': "
+          + e.getMessage(), e);
     }
 
     return code;
   }
 
+  private void processHeaderLine(StringBuffer line) throws IOException,
+      HttpException {
 
-  private void processHeaderLine(StringBuffer line)
-    throws IOException, HttpException {
-
-    int colonIndex = line.indexOf(":");       // key is up to colon
+    int colonIndex = line.indexOf(":"); // key is up to colon
     if (colonIndex == -1) {
       int i;
-      for (i= 0; i < line.length(); i++)
+      for (i = 0; i < line.length(); i++)
         if (!Character.isWhitespace(line.charAt(i)))
           break;
       if (i == line.length())
@@ -424,7 +430,7 @@ public class HttpResponse implements Res
     }
     String key = line.substring(0, colonIndex);
 
-    int valueStart = colonIndex+1;            // skip whitespace
+    int valueStart = colonIndex + 1; // skip whitespace
     while (valueStart < line.length()) {
       int c = line.charAt(valueStart);
       if (c != ' ' && c != '\t')
@@ -435,28 +441,27 @@ public class HttpResponse implements Res
     headers.set(key, value);
   }
 
-
   // Adds headers to our headers Metadata
   private void parseHeaders(PushbackInputStream in, StringBuffer line)
-    throws IOException, HttpException {
+      throws IOException, HttpException {
 
     while (readLine(in, line, true) != 0) {
 
       // handle HTTP responses with missing blank line after headers
       int pos;
-      if ( ((pos= line.indexOf("<!DOCTYPE")) != -1)
-           || ((pos= line.indexOf("<HTML")) != -1)
-           || ((pos= line.indexOf("<html")) != -1) ) {
+      if (((pos = line.indexOf("<!DOCTYPE")) != -1)
+          || ((pos = line.indexOf("<HTML")) != -1)
+          || ((pos = line.indexOf("<html")) != -1)) {
 
         in.unread(line.substring(pos).getBytes("UTF-8"));
         line.setLength(pos);
 
         try {
-            //TODO: (CM) We don't know the header names here
-            //since we're just handling them generically. It would
-            //be nice to provide some sort of mapping function here
-            //for the returned header names to the standard metadata
-            //names in the ParseData class
+          // TODO: (CM) We don't know the header names here
+          // since we're just handling them generically. It would
+          // be nice to provide some sort of mapping function here
+          // for the returned header names to the standard metadata
+          // names in the ParseData class
           processHeaderLine(line);
         } catch (Exception e) {
           // fixme:
@@ -470,29 +475,29 @@ public class HttpResponse implements Res
   }
 
   private static int readLine(PushbackInputStream in, StringBuffer line,
-                      boolean allowContinuedLine)
-    throws IOException {
+      boolean allowContinuedLine) throws IOException {
     line.setLength(0);
     for (int c = in.read(); c != -1; c = in.read()) {
       switch (c) {
-        case '\r':
-          if (peek(in) == '\n') {
-            in.read();
-          }
-        case '\n':
-          if (line.length() > 0) {
-            // at EOL -- check for continued line if the current
-            // (possibly continued) line wasn't blank
-            if (allowContinuedLine)
-              switch (peek(in)) {
-                case ' ' : case '\t':                   // line is continued
-                  in.read();
-                  continue;
-              }
-          }
-          return line.length();      // else complete
-        default :
-          line.append((char)c);
+      case '\r':
+        if (peek(in) == '\n') {
+          in.read();
+        }
+      case '\n':
+        if (line.length() > 0) {
+          // at EOL -- check for continued line if the current
+          // (possibly continued) line wasn't blank
+          if (allowContinuedLine)
+            switch (peek(in)) {
+            case ' ':
+            case '\t': // line is continued
+              in.read();
+              continue;
+            }
+        }
+        return line.length(); // else complete
+      default:
+        line.append((char) c);
       }
     }
     throw new EOFException();

Modified: nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java Fri Jan  9 06:34:33 2015
@@ -34,7 +34,7 @@ import org.mortbay.jetty.servlet.Context
 import org.mortbay.jetty.servlet.ServletHolder;
 
 /**
- * Test cases for protocol-http 
+ * Test cases for protocol-http
  */
 public class TestProtocolHttp {
   private static final String RES_DIR = System.getProperty("test.data", ".");
@@ -44,7 +44,7 @@ public class TestProtocolHttp {
   private Context root;
   private Configuration conf;
   private int port;
-  
+
   public void setUp(boolean redirection) throws Exception {
     this.conf = new Configuration();
     this.conf.addResource("nutch-default.xml");
@@ -52,18 +52,18 @@ public class TestProtocolHttp {
 
     this.http = new Http();
     this.http.setConf(conf);
-    
+
     this.server = new Server();
-    
+
     if (redirection) {
       this.root = new Context(server, "/redirection", Context.SESSIONS);
       this.root.setAttribute("newContextURL", "/redirect");
-    } 
-    else {
+    } else {
       this.root = new Context(server, "/", Context.SESSIONS);
     }
 
-    ServletHolder sh = new ServletHolder(org.apache.jasper.servlet.JspServlet.class);
+    ServletHolder sh = new ServletHolder(
+        org.apache.jasper.servlet.JspServlet.class);
     this.root.addServlet(sh, "*.jsp");
     this.root.setResourceBase(RES_DIR);
   }
@@ -89,12 +89,14 @@ public class TestProtocolHttp {
     startServer(47500, true);
     fetchPage("/redirection", 302);
   }
-  
+
   /**
    * Starts the Jetty server at a specified port and redirection parameter.
    * 
-   * @param portno Port number.
-   * @param redirection whether redirection        
+   * @param portno
+   *          Port number.
+   * @param redirection
+   *          whether redirection
    */
   private void startServer(int portno, boolean redirection) throws Exception {
     port = portno;
@@ -123,11 +125,13 @@ public class TestProtocolHttp {
     Response response = http.getResponse(url, p, true);
     ProtocolOutput out = http.getProtocolOutput(url.toString(), p);
     Content content = out.getContent();
-    
-    assertEquals("HTTP Status Code for " + url, expectedCode, response.getCode());
-    if (page.compareTo("/nonexists.html") != 0 
-    		 && page.compareTo("/brokenpage.jsp") != 0
-         && page.compareTo("/redirection") != 0)
-      assertEquals("ContentType " + url, "application/xhtml+xml", content.getContentType());
+
+    assertEquals("HTTP Status Code for " + url, expectedCode,
+        response.getCode());
+    if (page.compareTo("/nonexists.html") != 0
+        && page.compareTo("/brokenpage.jsp") != 0
+        && page.compareTo("/redirection") != 0)
+      assertEquals("ContentType " + url, "application/xhtml+xml",
+          content.getContentType());
   }
 }

Modified: nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java Fri Jan  9 06:34:33 2015
@@ -1,19 +1,19 @@
 /*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 /*
  * Based on EasySSLProtocolSocketFactory from commons-httpclient:
  * 
@@ -41,10 +41,12 @@ import org.slf4j.LoggerFactory;
 import javax.net.ssl.SSLContext;
 import javax.net.ssl.TrustManager;
 
-public class DummySSLProtocolSocketFactory implements SecureProtocolSocketFactory {
+public class DummySSLProtocolSocketFactory implements
+    SecureProtocolSocketFactory {
 
   /** Logger object for this class. */
-  private static final Logger LOG = LoggerFactory.getLogger(DummySSLProtocolSocketFactory.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(DummySSLProtocolSocketFactory.class);
 
   private SSLContext sslcontext = null;
 
@@ -58,10 +60,13 @@ public class DummySSLProtocolSocketFacto
   private static SSLContext createEasySSLContext() {
     try {
       SSLContext context = SSLContext.getInstance("SSL");
-      context.init(null, new TrustManager[] { new DummyX509TrustManager(null) }, null);
+      context.init(null,
+          new TrustManager[] { new DummyX509TrustManager(null) }, null);
       return context;
     } catch (Exception e) {
-      if (LOG.isErrorEnabled()) { LOG.error(e.getMessage(), e); }
+      if (LOG.isErrorEnabled()) {
+        LOG.error(e.getMessage(), e);
+      }
       throw new HttpClientError(e.toString());
     }
   }
@@ -76,10 +81,11 @@ public class DummySSLProtocolSocketFacto
   /**
    * @see org.apache.commons.httpclient.protocol.SecureProtocolSocketFactory#createSocket(String,int,InetAddress,int)
    */
-  public Socket createSocket(String host, int port, InetAddress clientHost, int clientPort) throws IOException,
-          UnknownHostException {
+  public Socket createSocket(String host, int port, InetAddress clientHost,
+      int clientPort) throws IOException, UnknownHostException {
 
-    return getSSLContext().getSocketFactory().createSocket(host, port, clientHost, clientPort);
+    return getSSLContext().getSocketFactory().createSocket(host, port,
+        clientHost, clientPort);
   }
 
   /**
@@ -93,20 +99,28 @@ public class DummySSLProtocolSocketFacto
    * throws an {@link ConnectTimeoutException}
    * </p>
    * 
-   * @param host the host name/IP
-   * @param port the port on the host
-   * @param localAddress the local host name/IP to bind the socket to
-   * @param localPort the port on the local machine
-   * @param params {@link HttpConnectionParams Http connection parameters}
+   * @param host
+   *          the host name/IP
+   * @param port
+   *          the port on the host
+   * @param localAddress
+   *          the local host name/IP to bind the socket to
+   * @param localPort
+   *          the port on the local machine
+   * @param params
+   *          {@link HttpConnectionParams Http connection parameters}
    * 
    * @return Socket a new socket
    * 
-   * @throws IOException if an I/O error occurs while creating the socket
-   * @throws UnknownHostException if the IP address of the host cannot be
-   *         determined
+   * @throws IOException
+   *           if an I/O error occurs while creating the socket
+   * @throws UnknownHostException
+   *           if the IP address of the host cannot be determined
    */
-  public Socket createSocket(final String host, final int port, final InetAddress localAddress, final int localPort,
-          final HttpConnectionParams params) throws IOException, UnknownHostException, ConnectTimeoutException {
+  public Socket createSocket(final String host, final int port,
+      final InetAddress localAddress, final int localPort,
+      final HttpConnectionParams params) throws IOException,
+      UnknownHostException, ConnectTimeoutException {
     if (params == null) {
       throw new IllegalArgumentException("Parameters may not be null");
     }
@@ -115,27 +129,31 @@ public class DummySSLProtocolSocketFacto
       return createSocket(host, port, localAddress, localPort);
     } else {
       // To be eventually deprecated when migrated to Java 1.4 or above
-      return ControllerThreadSocketFactory.createSocket(this, host, port, localAddress, localPort, timeout);
+      return ControllerThreadSocketFactory.createSocket(this, host, port,
+          localAddress, localPort, timeout);
     }
   }
 
   /**
    * @see org.apache.commons.httpclient.protocol.SecureProtocolSocketFactory#createSocket(String,int)
    */
-  public Socket createSocket(String host, int port) throws IOException, UnknownHostException {
+  public Socket createSocket(String host, int port) throws IOException,
+      UnknownHostException {
     return getSSLContext().getSocketFactory().createSocket(host, port);
   }
 
   /**
    * @see org.apache.commons.httpclient.protocol.SecureProtocolSocketFactory#createSocket(Socket,String,int,boolean)
    */
-  public Socket createSocket(Socket socket, String host, int port, boolean autoClose) throws IOException,
-          UnknownHostException {
-    return getSSLContext().getSocketFactory().createSocket(socket, host, port, autoClose);
+  public Socket createSocket(Socket socket, String host, int port,
+      boolean autoClose) throws IOException, UnknownHostException {
+    return getSSLContext().getSocketFactory().createSocket(socket, host, port,
+        autoClose);
   }
 
   public boolean equals(Object obj) {
-    return ((obj != null) && obj.getClass().equals(DummySSLProtocolSocketFactory.class));
+    return ((obj != null) && obj.getClass().equals(
+        DummySSLProtocolSocketFactory.class));
   }
 
   public int hashCode() {

Modified: nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummyX509TrustManager.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummyX509TrustManager.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummyX509TrustManager.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummyX509TrustManager.java Fri Jan  9 06:34:33 2015
@@ -1,19 +1,19 @@
 /**
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 /*
  * Based on EasyX509TrustManager from commons-httpclient.
  */
@@ -30,53 +30,57 @@ import javax.net.ssl.TrustManagerFactory
 import javax.net.ssl.TrustManager;
 import javax.net.ssl.X509TrustManager;
 
-public class DummyX509TrustManager implements X509TrustManager
-{
-    private X509TrustManager standardTrustManager = null;
-
-    /**
-     * Constructor for DummyX509TrustManager.
-     */
-    public DummyX509TrustManager(KeyStore keystore) throws NoSuchAlgorithmException, KeyStoreException {
-        super();
-        String algo = TrustManagerFactory.getDefaultAlgorithm();
-        TrustManagerFactory factory = TrustManagerFactory.getInstance(algo);
-        factory.init(keystore);
-        TrustManager[] trustmanagers = factory.getTrustManagers();
-        if (trustmanagers.length == 0) {
-            throw new NoSuchAlgorithmException(algo + " trust manager not supported");
-        }
-        this.standardTrustManager = (X509TrustManager)trustmanagers[0];
-    }
-
-    /**
-     * @see javax.net.ssl.X509TrustManager#checkClientTrusted(X509Certificate[], String)
-     */
-    public boolean isClientTrusted(X509Certificate[] certificates) {
-        return true;
-    }
-
-    /**
-     * @see javax.net.ssl.X509TrustManager#checkServerTrusted(X509Certificate[], String)
-     */
-    public boolean isServerTrusted(X509Certificate[] certificates) {
-      return true;
-    }
+public class DummyX509TrustManager implements X509TrustManager {
+  private X509TrustManager standardTrustManager = null;
 
-    /**
-     * @see javax.net.ssl.X509TrustManager#getAcceptedIssuers()
-     */
-    public X509Certificate[] getAcceptedIssuers() {
-        return this.standardTrustManager.getAcceptedIssuers();
-    }
+  /**
+   * Constructor for DummyX509TrustManager.
+   */
+  public DummyX509TrustManager(KeyStore keystore)
+      throws NoSuchAlgorithmException, KeyStoreException {
+    super();
+    String algo = TrustManagerFactory.getDefaultAlgorithm();
+    TrustManagerFactory factory = TrustManagerFactory.getInstance(algo);
+    factory.init(keystore);
+    TrustManager[] trustmanagers = factory.getTrustManagers();
+    if (trustmanagers.length == 0) {
+      throw new NoSuchAlgorithmException(algo + " trust manager not supported");
+    }
+    this.standardTrustManager = (X509TrustManager) trustmanagers[0];
+  }
+
+  /**
+   * @see javax.net.ssl.X509TrustManager#checkClientTrusted(X509Certificate[],
+   *      String)
+   */
+  public boolean isClientTrusted(X509Certificate[] certificates) {
+    return true;
+  }
+
+  /**
+   * @see javax.net.ssl.X509TrustManager#checkServerTrusted(X509Certificate[],
+   *      String)
+   */
+  public boolean isServerTrusted(X509Certificate[] certificates) {
+    return true;
+  }
+
+  /**
+   * @see javax.net.ssl.X509TrustManager#getAcceptedIssuers()
+   */
+  public X509Certificate[] getAcceptedIssuers() {
+    return this.standardTrustManager.getAcceptedIssuers();
+  }
+
+  public void checkClientTrusted(X509Certificate[] arg0, String arg1)
+      throws CertificateException {
+    // do nothing
+
+  }
+
+  public void checkServerTrusted(X509Certificate[] arg0, String arg1)
+      throws CertificateException {
+    // do nothing
 
-    public void checkClientTrusted(X509Certificate[] arg0, String arg1) throws CertificateException {
-      // do nothing
-      
-    }
-
-    public void checkServerTrusted(X509Certificate[] arg0, String arg1) throws CertificateException {
-      // do nothing
-      
-    }
+  }
 }

Modified: nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java Fri Jan  9 06:34:33 2015
@@ -67,395 +67,383 @@ import org.apache.nutch.util.NutchConfig
  */
 public class Http extends HttpBase {
 
-	public static final Logger LOG = LoggerFactory.getLogger(Http.class);
+  public static final Logger LOG = LoggerFactory.getLogger(Http.class);
 
-	private static MultiThreadedHttpConnectionManager connectionManager = new MultiThreadedHttpConnectionManager();
+  private static MultiThreadedHttpConnectionManager connectionManager = new MultiThreadedHttpConnectionManager();
 
-	// Since the Configuration has not yet been set,
-	// then an unconfigured client is returned.
-	private static HttpClient client = new HttpClient(connectionManager);
-	private static String defaultUsername;
-	private static String defaultPassword;
-	private static String defaultRealm;
-	private static String defaultScheme;
-	private static String authFile;
-	private static String agentHost;
-	private static boolean authRulesRead = false;
-	private static Configuration conf;
-
-	int maxThreadsTotal = 10;
-
-	private String proxyUsername;
-	private String proxyPassword;
-	private String proxyRealm;
-
-	private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
-
-	static {
-		FIELDS.add(WebPage.Field.MODIFIED_TIME);
-		FIELDS.add(WebPage.Field.HEADERS);
-	}
-
-	@Override
-	public Collection<Field> getFields() {
-		return FIELDS;
-	}
-	
-	/**
-	 * Returns the configured HTTP client.
-	 * 
-	 * @return HTTP client
-	 */
-	static synchronized HttpClient getClient() {
-		return client;
-	}
-
-	/**
-	 * Constructs this plugin.
-	 */
-	public Http() {
-		super(LOG);
-	}
-
-	/**
-	 * Reads the configuration from the Nutch configuration files and sets the
-	 * configuration.
-	 * 
-	 * @param conf
-	 *            Configuration
-	 */
-	public void setConf(Configuration conf) {
-		super.setConf(conf);
-		Http.conf = conf;
-		this.maxThreadsTotal = conf.getInt("fetcher.threads.fetch", 10);
-		this.proxyUsername = conf.get("http.proxy.username", "");
-		this.proxyPassword = conf.get("http.proxy.password", "");
-		this.proxyRealm = conf.get("http.proxy.realm", "");
-		agentHost = conf.get("http.agent.host", "");
-		authFile = conf.get("http.auth.file", "");
-		configureClient();
-		try {
-			setCredentials();
-		} catch (Exception ex) {
-			if (LOG.isErrorEnabled()) {
-				LOG.error("Could not read " + authFile + " : "
-						+ ex.getMessage());
-			}
-		}
-	}
-
-	/**
-	 * Main method.
-	 * 
-	 * @param args
-	 *            Command line arguments
-	 */
-	public static void main(String[] args) throws Exception {
-		Http http = new Http();
-		http.setConf(NutchConfiguration.create());
-		main(http, args);
-	}
-
-	/**
-	 * Fetches the <code>url</code> with a configured HTTP client and gets the
-	 * response.
-	 * 
-	 * @param url
-	 *            URL to be fetched
-	 * @param datum
-	 *            Crawl data
-	 * @param redirect
-	 *            Follow redirects if and only if true
-	 * @return HTTP response
-	 */
-	protected Response getResponse(URL url, WebPage page, boolean redirect)
-			throws ProtocolException, IOException {
-		resolveCredentials(url);
-		return new HttpResponse(this, url, page, redirect);
-	}
-
-	/**
-	 * Configures the HTTP client
-	 */
-	private void configureClient() {
-
-		// Set up an HTTPS socket factory that accepts self-signed certs.
-	  ProtocolSocketFactory factory = new SSLProtocolSocketFactory();
-		Protocol https = new Protocol("https", factory, 443);
-		Protocol.registerProtocol("https", https);
-
-		HttpConnectionManagerParams params = connectionManager.getParams();
-		params.setConnectionTimeout(timeout);
-		params.setSoTimeout(timeout);
-		params.setSendBufferSize(BUFFER_SIZE);
-		params.setReceiveBufferSize(BUFFER_SIZE);
-		params.setMaxTotalConnections(maxThreadsTotal);
-		
-		//Also set max connections per host to maxThreadsTotal since all threads
-		//might be used to fetch from the same host - otherwise timeout errors can occur
-		params.setDefaultMaxConnectionsPerHost(maxThreadsTotal);
-
-		// executeMethod(HttpMethod) seems to ignore the connection timeout on
-		// the connection manager.
-		// set it explicitly on the HttpClient.
-		client.getParams().setConnectionManagerTimeout(timeout);
-
-		HostConfiguration hostConf = client.getHostConfiguration();
-		ArrayList<Header> headers = new ArrayList<Header>();
-		// Set the User Agent in the header
-		headers.add(new Header("User-Agent", userAgent));
-		// prefer English
-		headers.add(new Header("Accept-Language",
-				"en-us,en-gb,en;q=0.7,*;q=0.3"));
-		// prefer UTF-8
-		headers.add(new Header("Accept-Charset",
-				"utf-8,ISO-8859-1;q=0.7,*;q=0.7"));
-		// prefer understandable formats
-		headers.add(new Header(
-				"Accept",
-				"text/html,application/xml;q=0.9,application/xhtml+xml,text/xml;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"));
-		// accept gzipped content
-		headers.add(new Header("Accept-Encoding", "x-gzip, gzip, deflate"));
-		hostConf.getParams().setParameter("http.default-headers", headers);
-
-		// HTTP proxy server details
-		if (useProxy) {
-			hostConf.setProxy(proxyHost, proxyPort);
-
-			if (proxyUsername.length() > 0) {
-
-				AuthScope proxyAuthScope = getAuthScope(this.proxyHost,
-						this.proxyPort, this.proxyRealm);
-
-				NTCredentials proxyCredentials = new NTCredentials(
-						this.proxyUsername, this.proxyPassword, Http.agentHost,
-						this.proxyRealm);
-
-				client.getState().setProxyCredentials(proxyAuthScope,
-						proxyCredentials);
-			}
-		}
-
-	}
-
-	/**
-	 * Reads authentication configuration file (defined as 'http.auth.file' in
-	 * Nutch configuration file) and sets the credentials for the configured
-	 * authentication scopes in the HTTP client object.
-	 * 
-	 * @throws ParserConfigurationException
-	 *             If a document builder can not be created.
-	 * @throws SAXException
-	 *             If any parsing error occurs.
-	 * @throws IOException
-	 *             If any I/O error occurs.
-	 */
-	private static synchronized void setCredentials()
-			throws ParserConfigurationException, SAXException, IOException {
-
-		if (authRulesRead)
-			return;
-
-		authRulesRead = true; // Avoid re-attempting to read
-
-		InputStream is = conf.getConfResourceAsInputStream(authFile);
-		if (is != null) {
-			Document doc = DocumentBuilderFactory.newInstance()
-					.newDocumentBuilder().parse(is);
-
-			Element rootElement = doc.getDocumentElement();
-			if (!"auth-configuration".equals(rootElement.getTagName())) {
-				if (LOG.isWarnEnabled())
-					LOG.warn("Bad auth conf file: root element <"
-							+ rootElement.getTagName() + "> found in "
-							+ authFile + " - must be <auth-configuration>");
-			}
-
-			// For each set of credentials
-			NodeList credList = rootElement.getChildNodes();
-			for (int i = 0; i < credList.getLength(); i++) {
-				Node credNode = credList.item(i);
-				if (!(credNode instanceof Element))
-					continue;
-
-				Element credElement = (Element) credNode;
-				if (!"credentials".equals(credElement.getTagName())) {
-					if (LOG.isWarnEnabled())
-						LOG.warn("Bad auth conf file: Element <"
-								+ credElement.getTagName()
-								+ "> not recognized in " + authFile
-								+ " - expected <credentials>");
-					continue;
-				}
-
-				String username = credElement.getAttribute("username");
-				String password = credElement.getAttribute("password");
-
-				// For each authentication scope
-				NodeList scopeList = credElement.getChildNodes();
-				for (int j = 0; j < scopeList.getLength(); j++) {
-					Node scopeNode = scopeList.item(j);
-					if (!(scopeNode instanceof Element))
-						continue;
-
-					Element scopeElement = (Element) scopeNode;
-
-					if ("default".equals(scopeElement.getTagName())) {
-
-						// Determine realm and scheme, if any
-						String realm = scopeElement.getAttribute("realm");
-						String scheme = scopeElement.getAttribute("scheme");
-
-						// Set default credentials
-						defaultUsername = username;
-						defaultPassword = password;
-						defaultRealm = realm;
-						defaultScheme = scheme;
-
-						if (LOG.isTraceEnabled()) {
-							LOG.trace("Credentials - username: " + username
-									+ "; set as default" + " for realm: "
-									+ realm + "; scheme: " + scheme);
-						}
-
-					} else if ("authscope".equals(scopeElement.getTagName())) {
-
-						// Determine authentication scope details
-						String host = scopeElement.getAttribute("host");
-						int port = -1; // For setting port to AuthScope.ANY_PORT
-						try {
-							port = Integer.parseInt(scopeElement
-									.getAttribute("port"));
-						} catch (Exception ex) {
-							// do nothing, port is already set to any port
-						}
-						String realm = scopeElement.getAttribute("realm");
-						String scheme = scopeElement.getAttribute("scheme");
-
-						// Set credentials for the determined scope
-						AuthScope authScope = getAuthScope(host, port, realm,
-								scheme);
-						NTCredentials credentials = new NTCredentials(username,
-								password, agentHost, realm);
-
-						client.getState()
-								.setCredentials(authScope, credentials);
-
-						if (LOG.isTraceEnabled()) {
-							LOG.trace("Credentials - username: " + username
-									+ "; set for AuthScope - " + "host: "
-									+ host + "; port: " + port + "; realm: "
-									+ realm + "; scheme: " + scheme);
-						}
-
-					} else {
-						if (LOG.isWarnEnabled())
-							LOG.warn("Bad auth conf file: Element <"
-									+ scopeElement.getTagName()
-									+ "> not recognized in " + authFile
-									+ " - expected <authscope>");
-					}
-				}
-				is.close();
-			}
-		}
-	}
-
-	/**
-	 * If credentials for the authentication scope determined from the specified
-	 * <code>url</code> is not already set in the HTTP client, then this method
-	 * sets the default credentials to fetch the specified <code>url</code>. If
-	 * credentials are found for the authentication scope, the method returns
-	 * without altering the client.
-	 * 
-	 * @param url
-	 *            URL to be fetched
-	 */
-	private void resolveCredentials(URL url) {
-
-		if (defaultUsername != null && defaultUsername.length() > 0) {
-
-			int port = url.getPort();
-			if (port == -1) {
-				if ("https".equals(url.getProtocol()))
-					port = 443;
-				else
-					port = 80;
-			}
-
-			AuthScope scope = new AuthScope(url.getHost(), port);
-
-			if (client.getState().getCredentials(scope) != null) {
-				if (LOG.isTraceEnabled())
-					LOG.trace("Pre-configured credentials with scope - host: "
-							+ url.getHost() + "; port: " + port
-							+ "; found for url: " + url);
-
-				// Credentials are already configured, so do nothing and return
-				return;
-			}
-
-			if (LOG.isTraceEnabled())
-				LOG.trace("Pre-configured credentials with scope -  host: "
-						+ url.getHost() + "; port: " + port
-						+ "; not found for url: " + url);
-
-			AuthScope serverAuthScope = getAuthScope(url.getHost(), port,
-					defaultRealm, defaultScheme);
-
-			NTCredentials serverCredentials = new NTCredentials(
-					defaultUsername, defaultPassword, agentHost, defaultRealm);
-
-			client.getState()
-					.setCredentials(serverAuthScope, serverCredentials);
-		}
-	}
-
-	/**
-	 * Returns an authentication scope for the specified <code>host</code>,
-	 * <code>port</code>, <code>realm</code> and <code>scheme</code>.
-	 * 
-	 * @param host
-	 *            Host name or address.
-	 * @param port
-	 *            Port number.
-	 * @param realm
-	 *            Authentication realm.
-	 * @param scheme
-	 *            Authentication scheme.
-	 */
-	private static AuthScope getAuthScope(String host, int port, String realm,
-			String scheme) {
-
-		if (host.length() == 0)
-			host = null;
-
-		if (port < 0)
-			port = -1;
-
-		if (realm.length() == 0)
-			realm = null;
-
-		if (scheme.length() == 0)
-			scheme = null;
-
-		return new AuthScope(host, port, realm, scheme);
-	}
-
-	/**
-	 * Returns an authentication scope for the specified <code>host</code>,
-	 * <code>port</code> and <code>realm</code>.
-	 * 
-	 * @param host
-	 *            Host name or address.
-	 * @param port
-	 *            Port number.
-	 * @param realm
-	 *            Authentication realm.
-	 */
-	private static AuthScope getAuthScope(String host, int port, String realm) {
+  // Since the Configuration has not yet been set,
+  // then an unconfigured client is returned.
+  private static HttpClient client = new HttpClient(connectionManager);
+  private static String defaultUsername;
+  private static String defaultPassword;
+  private static String defaultRealm;
+  private static String defaultScheme;
+  private static String authFile;
+  private static String agentHost;
+  private static boolean authRulesRead = false;
+  private static Configuration conf;
+
+  int maxThreadsTotal = 10;
+
+  private String proxyUsername;
+  private String proxyPassword;
+  private String proxyRealm;
+
+  private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
+
+  static {
+    FIELDS.add(WebPage.Field.MODIFIED_TIME);
+    FIELDS.add(WebPage.Field.HEADERS);
+  }
+
+  @Override
+  public Collection<Field> getFields() {
+    return FIELDS;
+  }
+
+  /**
+   * Returns the configured HTTP client.
+   * 
+   * @return HTTP client
+   */
+  static synchronized HttpClient getClient() {
+    return client;
+  }
+
+  /**
+   * Constructs this plugin.
+   */
+  public Http() {
+    super(LOG);
+  }
+
+  /**
+   * Reads the configuration from the Nutch configuration files and sets the
+   * configuration.
+   * 
+   * @param conf
+   *          Configuration
+   */
+  public void setConf(Configuration conf) {
+    super.setConf(conf);
+    Http.conf = conf;
+    this.maxThreadsTotal = conf.getInt("fetcher.threads.fetch", 10);
+    this.proxyUsername = conf.get("http.proxy.username", "");
+    this.proxyPassword = conf.get("http.proxy.password", "");
+    this.proxyRealm = conf.get("http.proxy.realm", "");
+    agentHost = conf.get("http.agent.host", "");
+    authFile = conf.get("http.auth.file", "");
+    configureClient();
+    try {
+      setCredentials();
+    } catch (Exception ex) {
+      if (LOG.isErrorEnabled()) {
+        LOG.error("Could not read " + authFile + " : " + ex.getMessage());
+      }
+    }
+  }
+
+  /**
+   * Main method.
+   * 
+   * @param args
+   *          Command line arguments
+   */
+  public static void main(String[] args) throws Exception {
+    Http http = new Http();
+    http.setConf(NutchConfiguration.create());
+    main(http, args);
+  }
+
+  /**
+   * Fetches the <code>url</code> with a configured HTTP client and gets the
+   * response.
+   * 
+   * @param url
+   *          URL to be fetched
+   * @param datum
+   *          Crawl data
+   * @param redirect
+   *          Follow redirects if and only if true
+   * @return HTTP response
+   */
+  protected Response getResponse(URL url, WebPage page, boolean redirect)
+      throws ProtocolException, IOException {
+    resolveCredentials(url);
+    return new HttpResponse(this, url, page, redirect);
+  }
+
+  /**
+   * Configures the HTTP client
+   */
+  private void configureClient() {
+
+    // Set up an HTTPS socket factory that accepts self-signed certs.
+    ProtocolSocketFactory factory = new SSLProtocolSocketFactory();
+    Protocol https = new Protocol("https", factory, 443);
+    Protocol.registerProtocol("https", https);
+
+    HttpConnectionManagerParams params = connectionManager.getParams();
+    params.setConnectionTimeout(timeout);
+    params.setSoTimeout(timeout);
+    params.setSendBufferSize(BUFFER_SIZE);
+    params.setReceiveBufferSize(BUFFER_SIZE);
+    params.setMaxTotalConnections(maxThreadsTotal);
+
+    // Also set max connections per host to maxThreadsTotal since all threads
+    // might be used to fetch from the same host - otherwise timeout errors can
+    // occur
+    params.setDefaultMaxConnectionsPerHost(maxThreadsTotal);
+
+    // executeMethod(HttpMethod) seems to ignore the connection timeout on
+    // the connection manager.
+    // set it explicitly on the HttpClient.
+    client.getParams().setConnectionManagerTimeout(timeout);
+
+    HostConfiguration hostConf = client.getHostConfiguration();
+    ArrayList<Header> headers = new ArrayList<Header>();
+    // Set the User Agent in the header
+    headers.add(new Header("User-Agent", userAgent));
+    // prefer English
+    headers.add(new Header("Accept-Language", "en-us,en-gb,en;q=0.7,*;q=0.3"));
+    // prefer UTF-8
+    headers.add(new Header("Accept-Charset", "utf-8,ISO-8859-1;q=0.7,*;q=0.7"));
+    // prefer understandable formats
+    headers
+        .add(new Header(
+            "Accept",
+            "text/html,application/xml;q=0.9,application/xhtml+xml,text/xml;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"));
+    // accept gzipped content
+    headers.add(new Header("Accept-Encoding", "x-gzip, gzip, deflate"));
+    hostConf.getParams().setParameter("http.default-headers", headers);
+
+    // HTTP proxy server details
+    if (useProxy) {
+      hostConf.setProxy(proxyHost, proxyPort);
+
+      if (proxyUsername.length() > 0) {
+
+        AuthScope proxyAuthScope = getAuthScope(this.proxyHost, this.proxyPort,
+            this.proxyRealm);
+
+        NTCredentials proxyCredentials = new NTCredentials(this.proxyUsername,
+            this.proxyPassword, Http.agentHost, this.proxyRealm);
+
+        client.getState().setProxyCredentials(proxyAuthScope, proxyCredentials);
+      }
+    }
+
+  }
+
+  /**
+   * Reads authentication configuration file (defined as 'http.auth.file' in
+   * Nutch configuration file) and sets the credentials for the configured
+   * authentication scopes in the HTTP client object.
+   * 
+   * @throws ParserConfigurationException
+   *           If a document builder can not be created.
+   * @throws SAXException
+   *           If any parsing error occurs.
+   * @throws IOException
+   *           If any I/O error occurs.
+   */
+  private static synchronized void setCredentials()
+      throws ParserConfigurationException, SAXException, IOException {
+
+    if (authRulesRead)
+      return;
+
+    authRulesRead = true; // Avoid re-attempting to read
+
+    InputStream is = conf.getConfResourceAsInputStream(authFile);
+    if (is != null) {
+      Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder()
+          .parse(is);
+
+      Element rootElement = doc.getDocumentElement();
+      if (!"auth-configuration".equals(rootElement.getTagName())) {
+        if (LOG.isWarnEnabled())
+          LOG.warn("Bad auth conf file: root element <"
+              + rootElement.getTagName() + "> found in " + authFile
+              + " - must be <auth-configuration>");
+      }
+
+      // For each set of credentials
+      NodeList credList = rootElement.getChildNodes();
+      for (int i = 0; i < credList.getLength(); i++) {
+        Node credNode = credList.item(i);
+        if (!(credNode instanceof Element))
+          continue;
+
+        Element credElement = (Element) credNode;
+        if (!"credentials".equals(credElement.getTagName())) {
+          if (LOG.isWarnEnabled())
+            LOG.warn("Bad auth conf file: Element <" + credElement.getTagName()
+                + "> not recognized in " + authFile
+                + " - expected <credentials>");
+          continue;
+        }
+
+        String username = credElement.getAttribute("username");
+        String password = credElement.getAttribute("password");
+
+        // For each authentication scope
+        NodeList scopeList = credElement.getChildNodes();
+        for (int j = 0; j < scopeList.getLength(); j++) {
+          Node scopeNode = scopeList.item(j);
+          if (!(scopeNode instanceof Element))
+            continue;
+
+          Element scopeElement = (Element) scopeNode;
+
+          if ("default".equals(scopeElement.getTagName())) {
+
+            // Determine realm and scheme, if any
+            String realm = scopeElement.getAttribute("realm");
+            String scheme = scopeElement.getAttribute("scheme");
+
+            // Set default credentials
+            defaultUsername = username;
+            defaultPassword = password;
+            defaultRealm = realm;
+            defaultScheme = scheme;
+
+            if (LOG.isTraceEnabled()) {
+              LOG.trace("Credentials - username: " + username
+                  + "; set as default" + " for realm: " + realm + "; scheme: "
+                  + scheme);
+            }
+
+          } else if ("authscope".equals(scopeElement.getTagName())) {
+
+            // Determine authentication scope details
+            String host = scopeElement.getAttribute("host");
+            int port = -1; // For setting port to AuthScope.ANY_PORT
+            try {
+              port = Integer.parseInt(scopeElement.getAttribute("port"));
+            } catch (Exception ex) {
+              // do nothing, port is already set to any port
+            }
+            String realm = scopeElement.getAttribute("realm");
+            String scheme = scopeElement.getAttribute("scheme");
+
+            // Set credentials for the determined scope
+            AuthScope authScope = getAuthScope(host, port, realm, scheme);
+            NTCredentials credentials = new NTCredentials(username, password,
+                agentHost, realm);
+
+            client.getState().setCredentials(authScope, credentials);
+
+            if (LOG.isTraceEnabled()) {
+              LOG.trace("Credentials - username: " + username
+                  + "; set for AuthScope - " + "host: " + host + "; port: "
+                  + port + "; realm: " + realm + "; scheme: " + scheme);
+            }
+
+          } else {
+            if (LOG.isWarnEnabled())
+              LOG.warn("Bad auth conf file: Element <"
+                  + scopeElement.getTagName() + "> not recognized in "
+                  + authFile + " - expected <authscope>");
+          }
+        }
+        is.close();
+      }
+    }
+  }
+
+  /**
+   * If credentials for the authentication scope determined from the specified
+   * <code>url</code> is not already set in the HTTP client, then this method
+   * sets the default credentials to fetch the specified <code>url</code>. If
+   * credentials are found for the authentication scope, the method returns
+   * without altering the client.
+   * 
+   * @param url
+   *          URL to be fetched
+   */
+  private void resolveCredentials(URL url) {
+
+    if (defaultUsername != null && defaultUsername.length() > 0) {
+
+      int port = url.getPort();
+      if (port == -1) {
+        if ("https".equals(url.getProtocol()))
+          port = 443;
+        else
+          port = 80;
+      }
+
+      AuthScope scope = new AuthScope(url.getHost(), port);
+
+      if (client.getState().getCredentials(scope) != null) {
+        if (LOG.isTraceEnabled())
+          LOG.trace("Pre-configured credentials with scope - host: "
+              + url.getHost() + "; port: " + port + "; found for url: " + url);
+
+        // Credentials are already configured, so do nothing and return
+        return;
+      }
+
+      if (LOG.isTraceEnabled())
+        LOG.trace("Pre-configured credentials with scope -  host: "
+            + url.getHost() + "; port: " + port + "; not found for url: " + url);
+
+      AuthScope serverAuthScope = getAuthScope(url.getHost(), port,
+          defaultRealm, defaultScheme);
+
+      NTCredentials serverCredentials = new NTCredentials(defaultUsername,
+          defaultPassword, agentHost, defaultRealm);
+
+      client.getState().setCredentials(serverAuthScope, serverCredentials);
+    }
+  }
+
+  /**
+   * Returns an authentication scope for the specified <code>host</code>,
+   * <code>port</code>, <code>realm</code> and <code>scheme</code>.
+   * 
+   * @param host
+   *          Host name or address.
+   * @param port
+   *          Port number.
+   * @param realm
+   *          Authentication realm.
+   * @param scheme
+   *          Authentication scheme.
+   */
+  private static AuthScope getAuthScope(String host, int port, String realm,
+      String scheme) {
+
+    if (host.length() == 0)
+      host = null;
+
+    if (port < 0)
+      port = -1;
+
+    if (realm.length() == 0)
+      realm = null;
+
+    if (scheme.length() == 0)
+      scheme = null;
+
+    return new AuthScope(host, port, realm, scheme);
+  }
+
+  /**
+   * Returns an authentication scope for the specified <code>host</code>,
+   * <code>port</code> and <code>realm</code>.
+   * 
+   * @param host
+   *          Host name or address.
+   * @param port
+   *          Port number.
+   * @param realm
+   *          Authentication realm.
+   */
+  private static AuthScope getAuthScope(String host, int port, String realm) {
 
-		return getAuthScope(host, port, realm, "");
-	}
+    return getAuthScope(host, port, realm, "");
+  }
 
 }

Modified: nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthentication.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthentication.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthentication.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthentication.java Fri Jan  9 06:34:33 2015
@@ -15,32 +15,31 @@
  * limitations under the License.
  */
 package org.apache.nutch.protocol.httpclient;
- 
+
 import java.util.List;
 
 /**
- *  The base level of services required for Http Authentication
- *
+ * The base level of services required for Http Authentication
+ * 
  * @see HttpAuthenticationFactory
  * 
- * @author    Matt Tencati
+ * @author Matt Tencati
  */
 public interface HttpAuthentication {
 
-    /**
-     *  Gets the credentials generated by the HttpAuthentication
-     *  object.  May return null.
-     *
-     * @return    The credentials value
-     */
-    public List getCredentials();
+  /**
+   * Gets the credentials generated by the HttpAuthentication object. May return
+   * null.
+   * 
+   * @return The credentials value
+   */
+  public List getCredentials();
 
-    /**
-     *  Gets the realm used by the HttpAuthentication object during creation.
-     *
-     *  @return    The realm value
-     */
-    public String getRealm();
+  /**
+   * Gets the realm used by the HttpAuthentication object during creation.
+   * 
+   * @return The realm value
+   */
+  public String getRealm();
 
 }
-

Modified: nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationException.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationException.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationException.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationException.java Fri Jan  9 06:34:33 2015
@@ -26,40 +26,46 @@ public class HttpAuthenticationException
 
   private static final long serialVersionUID = 1L;
 
-    /**
-     *  Constructs a new exception with null as its detail message.
-     */
-    public HttpAuthenticationException() {
-        super();
-    }
-
-    /**
-     * Constructs a new exception with the specified detail message.
-     * 
-     * @param message the detail message. The detail message is saved for later retrieval by the {@link Throwable#getMessage()} method.
-     */
-    public HttpAuthenticationException(String message) {
-        super(message);
-    }
-
-    /**
-     * Constructs a new exception with the specified message and cause.
-     *
-     * @param message the detail message. The detail message is saved for later retrieval by the {@link Throwable#getMessage()} method.
-     * @param cause the cause (use {@link #getCause()} to retrieve the cause)
-     */
-    public HttpAuthenticationException(String message, Throwable cause) {
-        super(message, cause);
-    }
-
-    /**
-     * Constructs a new exception with the specified cause and detail message from
-     * given clause if it is not null.
-     * 
-     * @param cause the cause (use {@link #getCause()} to retrieve the cause)
-     */
-    public HttpAuthenticationException(Throwable cause) {
-        super(cause);
-    }
+  /**
+   * Constructs a new exception with null as its detail message.
+   */
+  public HttpAuthenticationException() {
+    super();
+  }
+
+  /**
+   * Constructs a new exception with the specified detail message.
+   * 
+   * @param message
+   *          the detail message. The detail message is saved for later
+   *          retrieval by the {@link Throwable#getMessage()} method.
+   */
+  public HttpAuthenticationException(String message) {
+    super(message);
+  }
+
+  /**
+   * Constructs a new exception with the specified message and cause.
+   * 
+   * @param message
+   *          the detail message. The detail message is saved for later
+   *          retrieval by the {@link Throwable#getMessage()} method.
+   * @param cause
+   *          the cause (use {@link #getCause()} to retrieve the cause)
+   */
+  public HttpAuthenticationException(String message, Throwable cause) {
+    super(message, cause);
+  }
+
+  /**
+   * Constructs a new exception with the specified cause and detail message from
+   * given clause if it is not null.
+   * 
+   * @param cause
+   *          the cause (use {@link #getCause()} to retrieve the cause)
+   */
+  public HttpAuthenticationException(Throwable cause) {
+    super(cause);
+  }
 
 }

Modified: nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java Fri Jan  9 06:34:33 2015
@@ -34,12 +34,10 @@ import org.apache.hadoop.conf.Configurab
 // Nutch imports
 import org.apache.nutch.metadata.Metadata;
 
-
 /**
- * Provides the Http protocol implementation
- * with the ability to authenticate when prompted.  The goal is to provide 
- * multiple authentication types but for now just the {@link HttpBasicAuthentication} authentication 
- * type is provided.
+ * Provides the Http protocol implementation with the ability to authenticate
+ * when prompted. The goal is to provide multiple authentication types but for
+ * now just the {@link HttpBasicAuthentication} authentication type is provided.
  * 
  * @see HttpBasicAuthentication
  * @see Http
@@ -49,94 +47,96 @@ import org.apache.nutch.metadata.Metadat
  */
 public class HttpAuthenticationFactory implements Configurable {
 
-    /** 
-     * The HTTP Authentication (WWW-Authenticate) header which is returned 
-     * by a webserver requiring authentication.
-     */
-    public static final String WWW_AUTHENTICATE = "WWW-Authenticate";
-	
-    public static final Logger LOG = LoggerFactory.getLogger(HttpAuthenticationFactory.class);
-
-    private static Map<?, ?> auths = new TreeMap<Object, Object>(); 
-
-    private Configuration conf = null;
-    
-    
-    public HttpAuthenticationFactory(Configuration conf) {
-      setConf(conf);
-    }
-
-   
-    /* ---------------------------------- *
-     * <implementation:Configurable> *
-     * ---------------------------------- */
-
-    public void setConf(Configuration conf) {
-      this.conf = conf;
-      //if (conf.getBoolean("http.auth.verbose", false)) {
-      //  LOG.setLevel(Level.FINE);
-      //} else {
-      //  LOG.setLevel(Level.WARNING);
-      //}
-    }
-
-    public Configuration getConf() {
-      return conf;
-    }
- 
-    /* ---------------------------------- *
-     * <implementation:Configurable> *
-     * ---------------------------------- */
-
-
-    @SuppressWarnings("unchecked")
-    public HttpAuthentication findAuthentication(Metadata header) {
-
-        if (header == null) return null;
-        
-    	try {
-			Collection challenge = null;
-			if (header instanceof Metadata) {
-				Object o = header.get(WWW_AUTHENTICATE);
-				if (o instanceof Collection) {
-					challenge = (Collection<?>) o;
-				} else {
-					challenge = new ArrayList<String>();
-					challenge.add(o.toString());
-				}
-			} else {
-				String challengeString = header.get(WWW_AUTHENTICATE); 
-				if (challengeString != null) {
-					challenge = new ArrayList<Object>();
-					challenge.add(challengeString);
-				}
-			}
-			if (challenge == null) {
-                                if (LOG.isTraceEnabled()) {
-				  LOG.trace("Authentication challenge is null");
-                                }
-				return null;
-			}
-			
-			Iterator<?> i = challenge.iterator();
-			HttpAuthentication auth = null;
-			while (i.hasNext() && auth == null) {
-				String challengeString = (String)i.next();
-				if (challengeString.equals("NTLM")) {
-				   challengeString="Basic realm=techweb";
-		                  }
-		               
-                                if (LOG.isTraceEnabled()) {  
-		                  LOG.trace("Checking challengeString=" + challengeString);
-                                }
-				auth = HttpBasicAuthentication.getAuthentication(challengeString, conf);
-				if (auth != null) return auth;
-				
-				//TODO Add additional Authentication lookups here
-			}
-		} catch (Exception e) {
-			LOG.error("Failed with following exception: ", e);
-		}
+  /**
+   * The HTTP Authentication (WWW-Authenticate) header which is returned by a
+   * webserver requiring authentication.
+   */
+  public static final String WWW_AUTHENTICATE = "WWW-Authenticate";
+
+  public static final Logger LOG = LoggerFactory
+      .getLogger(HttpAuthenticationFactory.class);
+
+  private static Map<?, ?> auths = new TreeMap<Object, Object>();
+
+  private Configuration conf = null;
+
+  public HttpAuthenticationFactory(Configuration conf) {
+    setConf(conf);
+  }
+
+  /*
+   * ---------------------------------- * <implementation:Configurable> *
+   * ----------------------------------
+   */
+
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+    // if (conf.getBoolean("http.auth.verbose", false)) {
+    // LOG.setLevel(Level.FINE);
+    // } else {
+    // LOG.setLevel(Level.WARNING);
+    // }
+  }
+
+  public Configuration getConf() {
+    return conf;
+  }
+
+  /*
+   * ---------------------------------- * <implementation:Configurable> *
+   * ----------------------------------
+   */
+
+  @SuppressWarnings("unchecked")
+  public HttpAuthentication findAuthentication(Metadata header) {
+
+    if (header == null)
+      return null;
+
+    try {
+      Collection challenge = null;
+      if (header instanceof Metadata) {
+        Object o = header.get(WWW_AUTHENTICATE);
+        if (o instanceof Collection) {
+          challenge = (Collection<?>) o;
+        } else {
+          challenge = new ArrayList<String>();
+          challenge.add(o.toString());
+        }
+      } else {
+        String challengeString = header.get(WWW_AUTHENTICATE);
+        if (challengeString != null) {
+          challenge = new ArrayList<Object>();
+          challenge.add(challengeString);
+        }
+      }
+      if (challenge == null) {
+        if (LOG.isTraceEnabled()) {
+          LOG.trace("Authentication challenge is null");
+        }
         return null;
+      }
+
+      Iterator<?> i = challenge.iterator();
+      HttpAuthentication auth = null;
+      while (i.hasNext() && auth == null) {
+        String challengeString = (String) i.next();
+        if (challengeString.equals("NTLM")) {
+          challengeString = "Basic realm=techweb";
+        }
+
+        if (LOG.isTraceEnabled()) {
+          LOG.trace("Checking challengeString=" + challengeString);
+        }
+        auth = HttpBasicAuthentication.getAuthentication(challengeString, conf);
+        if (auth != null)
+          return auth;
+
+        // TODO Add additional Authentication lookups here
+      }
+    } catch (Exception e) {
+      LOG.error("Failed with following exception: ", e);
     }
+    return null;
+  }
 }

Modified: nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java Fri Jan  9 06:34:33 2015
@@ -35,154 +35,163 @@ import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configurable;
 
-
 /**
- * Implementation of RFC 2617 Basic Authentication.  Usernames and passwords are stored 
- * in standard Nutch configuration files using the following properties:
- *      http.auth.basic.<realm>.user
- *      http.auth.basic.<realm>.pass
+ * Implementation of RFC 2617 Basic Authentication. Usernames and passwords are
+ * stored in standard Nutch configuration files using the following properties:
+ * http.auth.basic.<realm>.user http.auth.basic.<realm>.pass
  */
-public class HttpBasicAuthentication implements HttpAuthentication, Configurable {
+public class HttpBasicAuthentication implements HttpAuthentication,
+    Configurable {
 
-    public static final Logger LOG = LoggerFactory.getLogger(HttpBasicAuthentication.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(HttpBasicAuthentication.class);
 
-    private static Pattern basic = Pattern.compile("[bB][aA][sS][iI][cC] [rR][eE][aA][lL][mM]=\"(\\w*)\"");
-	
-    private static Map<String, HttpBasicAuthentication> authMap = new TreeMap<String, HttpBasicAuthentication>();
-   
-    private Configuration conf = null; 
-    private String challenge = null;
-    private ArrayList<String> credentials = null;
-    private String realm = null;
-
-
-    /**
-     *  Construct an HttpBasicAuthentication for the given challenge
-     *  parameters. The challenge parameters are returned by the web
-     *  server using a WWW-Authenticate header. This will typically be
-     *  represented by single line of the form <code>WWW-Authenticate: Basic realm="myrealm"</code>
-     *
-     * @param  challenge  WWW-Authenticate header from web server
-     */
-    protected HttpBasicAuthentication(String challenge, Configuration conf) throws HttpAuthenticationException {
-        
-        setConf(conf);
-        this.challenge = challenge;
-        credentials = new ArrayList<String>();
-        
-        String username = this.conf.get("http.auth.basic." + challenge + ".user");
-        String password = this.conf.get("http.auth.basic." + challenge + ".password");
-        
-        if (LOG.isTraceEnabled()) {
-          LOG.trace("BasicAuthentication challenge is " + challenge);
-          LOG.trace("BasicAuthentication username=" + username);
-          LOG.trace("BasicAuthentication password=" + password);
-        }
- 
-        if (username == null) {
-        	throw new HttpAuthenticationException("Username for " + challenge + " is null");
-        }
+  private static Pattern basic = Pattern
+      .compile("[bB][aA][sS][iI][cC] [rR][eE][aA][lL][mM]=\"(\\w*)\"");
 
-        if (password == null) {
-        	throw new HttpAuthenticationException("Password for " + challenge + " is null");
+  private static Map<String, HttpBasicAuthentication> authMap = new TreeMap<String, HttpBasicAuthentication>();
+
+  private Configuration conf = null;
+  private String challenge = null;
+  private ArrayList<String> credentials = null;
+  private String realm = null;
+
+  /**
+   * Construct an HttpBasicAuthentication for the given challenge parameters.
+   * The challenge parameters are returned by the web server using a
+   * WWW-Authenticate header. This will typically be represented by single line
+   * of the form <code>WWW-Authenticate: Basic realm="myrealm"</code>
+   * 
+   * @param challenge
+   *          WWW-Authenticate header from web server
+   */
+  protected HttpBasicAuthentication(String challenge, Configuration conf)
+      throws HttpAuthenticationException {
+
+    setConf(conf);
+    this.challenge = challenge;
+    credentials = new ArrayList<String>();
+
+    String username = this.conf.get("http.auth.basic." + challenge + ".user");
+    String password = this.conf.get("http.auth.basic." + challenge
+        + ".password");
+
+    if (LOG.isTraceEnabled()) {
+      LOG.trace("BasicAuthentication challenge is " + challenge);
+      LOG.trace("BasicAuthentication username=" + username);
+      LOG.trace("BasicAuthentication password=" + password);
+    }
+
+    if (username == null) {
+      throw new HttpAuthenticationException("Username for " + challenge
+          + " is null");
+    }
+
+    if (password == null) {
+      throw new HttpAuthenticationException("Password for " + challenge
+          + " is null");
+    }
+
+    byte[] credBytes = (username + ":" + password).getBytes();
+    credentials.add("Authorization: Basic "
+        + new String(Base64.encodeBase64(credBytes)));
+    if (LOG.isTraceEnabled()) {
+      LOG.trace("Basic credentials: " + credentials);
+    }
+  }
+
+  /*
+   * ---------------------------------- * <implementation:Configurable> *
+   * ----------------------------------
+   */
+
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+    // if (conf.getBoolean("http.auth.verbose", false)) {
+    // LOG.setLevel(Level.FINE);
+    // } else {
+    // LOG.setLevel(Level.WARNING);
+    // }
+  }
+
+  public Configuration getConf() {
+    return this.conf;
+  }
+
+  /*
+   * ---------------------------------- * <implementation:Configurable> *
+   * ----------------------------------
+   */
+
+  /**
+   * Gets the Basic credentials generated by this HttpBasicAuthentication object
+   * 
+   * @return Credentials in the form of
+   *         <code>Authorization: Basic &lt;Base64 encoded userid:password&gt;
+   * 
+   */
+  public List<String> getCredentials() {
+    return credentials;
+  }
+
+  /**
+   * Gets the realm attribute of the HttpBasicAuthentication object. This should
+   * have been supplied to the {@link #getAuthentication(String, Configuration)}
+   * static method
+   * 
+   * @return The realm
+   */
+  public String getRealm() {
+    return realm;
+  }
+
+  /**
+   * This method is responsible for providing Basic authentication information.
+   * The method caches authentication information for each realm so that the
+   * required authentication information does not need to be regenerated for
+   * every request.
+   * 
+   * @param challenge
+   *          The challenge string provided by the webserver. This is the text
+   *          which follows the WWW-Authenticate header, including the Basic
+   *          tag.
+   * @return An HttpBasicAuthentication object or null if unable to generate
+   *         appropriate credentials.
+   */
+  public static HttpBasicAuthentication getAuthentication(String challenge,
+      Configuration conf) {
+    if (challenge == null)
+      return null;
+    Matcher basicMatcher = basic.matcher(challenge);
+    if (basicMatcher.matches()) {
+      String realm = basicMatcher.group(1);
+      Object auth = authMap.get(realm);
+      if (auth == null) {
+        HttpBasicAuthentication newAuth = null;
+        try {
+          newAuth = new HttpBasicAuthentication(realm, conf);
+        } catch (HttpAuthenticationException hae) {
+          if (LOG.isTraceEnabled()) {
+            LOG.trace("HttpBasicAuthentication failed for " + challenge);
+          }
         }
-        
-        byte[] credBytes = (username + ":" + password).getBytes();
-        credentials.add("Authorization: Basic " + new String(Base64.encodeBase64(credBytes)));
-        if (LOG.isTraceEnabled()) {
-          LOG.trace("Basic credentials: " + credentials);
-        }
-    }
-
-
-    /* ---------------------------------- *
-     * <implementation:Configurable> *
-     * ---------------------------------- */
-
-    public void setConf(Configuration conf) {
-      this.conf = conf;
-      //if (conf.getBoolean("http.auth.verbose", false)) {
-      //  LOG.setLevel(Level.FINE);
-      //} else {
-      //  LOG.setLevel(Level.WARNING);
-      //}
-    }
-
-    public Configuration getConf() {
-      return this.conf;
-    }
-
-    /* ---------------------------------- *
-     * <implementation:Configurable> *
-     * ---------------------------------- */
-
-
-    /**
-     *  Gets the Basic credentials generated by this
-     *  HttpBasicAuthentication object
-     *
-     * @return    Credentials in the form of <code>Authorization: Basic &lt;Base64 encoded userid:password&gt;
-     *
-     */
-    public List<String> getCredentials() {
-        return credentials;
-    }
-
-
-   /**
-    * Gets the realm attribute of the HttpBasicAuthentication object.
-    * This should have been supplied to the {@link #getAuthentication(String, Configuration)}
-    * static method
-    *
-    * @return    The realm
-    */
-    public String getRealm() {
-        return realm;
-    }
-
-    /**
-     * This method is responsible for providing Basic authentication information.  The
-     * method caches authentication information for each realm so that the required
-     * authentication information does not need to be regenerated for every request.
-     *  
-     * @param challenge The challenge string provided by the webserver.  This is the
-     * text which follows the WWW-Authenticate header, including the Basic tag.
-     * @return An HttpBasicAuthentication object or null 
-     * if unable to generate appropriate credentials.
-     */
-    public static HttpBasicAuthentication getAuthentication(String challenge, Configuration conf) {
-        if (challenge == null) return null;
-        Matcher basicMatcher = basic.matcher(challenge);
-        if (basicMatcher.matches()) {
-        	String realm = basicMatcher.group(1);
-	        Object auth = authMap.get(realm);
-	        if (auth == null) {
-	            HttpBasicAuthentication newAuth = null;
-	            try {
-	            	newAuth = new HttpBasicAuthentication(realm, conf);
-	            } catch (HttpAuthenticationException hae) { 
-                        if (LOG.isTraceEnabled()) {
-	            	  LOG.trace("HttpBasicAuthentication failed for " + challenge);
-                        }
-	            }
-	            authMap.put(realm, newAuth);
-	            return newAuth;
-	        } else {
-	            return (HttpBasicAuthentication) auth;
-	        }
-        }
-        return null;
-    }
-    
-	/**
-	 * Provides a pattern which can be used by an outside resource to determine if 
-	 * this class can provide credentials based on simple header information.  It does
-	 * not calculate any information regarding realms or challenges.
-	 * 
-	 * @return Returns a Pattern which will match a Basic WWW-Authenticate header.
-	 */
-	public static final Pattern getBasicPattern() {
-		return basic;
-	}
+        authMap.put(realm, newAuth);
+        return newAuth;
+      } else {
+        return (HttpBasicAuthentication) auth;
+      }
+    }
+    return null;
+  }
+
+  /**
+   * Provides a pattern which can be used by an outside resource to determine if
+   * this class can provide credentials based on simple header information. It
+   * does not calculate any information regarding realms or challenges.
+   * 
+   * @return Returns a Pattern which will match a Basic WWW-Authenticate header.
+   */
+  public static final Pattern getBasicPattern() {
+    return basic;
+  }
 }
-

Modified: nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java Fri Jan  9 06:34:33 2015
@@ -41,7 +41,7 @@ import org.apache.nutch.storage.WebPage;
 
 /**
  * An HTTP response.
- *
+ * 
  * @author Susam Pal
  */
 public class HttpResponse implements Response {
@@ -53,18 +53,22 @@ public class HttpResponse implements Res
 
   /**
    * Fetches the given <code>url</code> and prepares HTTP response.
-   *
-   * @param http                An instance of the implementation class
-   *                            of this plugin
-   * @param url                 URL to be fetched
-   * @param page                WebPage
-   * @param followRedirects     Whether to follow redirects; follows
-   *                            redirect if and only if this is true
-   * @return                    HTTP response
-   * @throws IOException        When an error occurs
+   * 
+   * @param http
+   *          An instance of the implementation class of this plugin
+   * @param url
+   *          URL to be fetched
+   * @param page
+   *          WebPage
+   * @param followRedirects
+   *          Whether to follow redirects; follows redirect if and only if this
+   *          is true
+   * @return HTTP response
+   * @throws IOException
+   *           When an error occurs
    */
-  HttpResponse(Http http, URL url, WebPage page,
-      boolean followRedirects) throws IOException {
+  HttpResponse(Http http, URL url, WebPage page, boolean followRedirects)
+      throws IOException {
 
     // Prepare GET method for HTTP request
     this.url = url;
@@ -99,7 +103,7 @@ public class HttpResponse implements Res
       for (int i = 0; i < heads.length; i++) {
         headers.set(heads[i].getName(), heads[i].getValue());
       }
-      
+
       // Limit download size
       int contentLength = Integer.MAX_VALUE;
       String contentLengthString = headers.get(Response.CONTENT_LENGTH);
@@ -107,12 +111,10 @@ public class HttpResponse implements Res
         try {
           contentLength = Integer.parseInt(contentLengthString.trim());
         } catch (NumberFormatException ex) {
-          throw new HttpException("bad content length: " +
-              contentLengthString);
+          throw new HttpException("bad content length: " + contentLengthString);
         }
       }
-      if (http.getMaxContent() >= 0 &&
-          contentLength > http.getMaxContent()) {
+      if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) {
         contentLength = http.getMaxContent();
       }
 
@@ -132,7 +134,8 @@ public class HttpResponse implements Res
 
         content = out.toByteArray();
       } catch (Exception e) {
-        if (code == 200) throw new IOException(e.toString());
+        if (code == 200)
+          throw new IOException(e.toString());
         // for codes other than 200 OK, we are fine with empty content
       } finally {
         if (in != null) {
@@ -140,16 +143,15 @@ public class HttpResponse implements Res
         }
         get.abort();
       }
-      
+
       StringBuilder fetchTrace = null;
       if (Http.LOG.isTraceEnabled()) {
         // Trace message
-        fetchTrace = new StringBuilder("url: " + url +
-            "; status code: " + code +
-            "; bytes received: " + content.length);
+        fetchTrace = new StringBuilder("url: " + url + "; status code: " + code
+            + "; bytes received: " + content.length);
         if (getHeader(Response.CONTENT_LENGTH) != null)
-          fetchTrace.append("; Content-Length: " +
-              getHeader(Response.CONTENT_LENGTH));
+          fetchTrace.append("; Content-Length: "
+              + getHeader(Response.CONTENT_LENGTH));
         if (getHeader(Response.LOCATION) != null)
           fetchTrace.append("; Location: " + getHeader(Response.LOCATION));
       }
@@ -159,8 +161,7 @@ public class HttpResponse implements Res
         String contentEncoding = headers.get(Response.CONTENT_ENCODING);
         if (contentEncoding != null && Http.LOG.isTraceEnabled())
           fetchTrace.append("; Content-Encoding: " + contentEncoding);
-        if ("gzip".equals(contentEncoding) ||
-            "x-gzip".equals(contentEncoding)) {
+        if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) {
           content = http.processGzipEncoded(content, url);
           if (Http.LOG.isTraceEnabled())
             fetchTrace.append("; extracted to " + content.length + " bytes");
@@ -170,14 +171,14 @@ public class HttpResponse implements Res
             fetchTrace.append("; extracted to " + content.length + " bytes");
         }
       }
-      
+
       // add headers in metadata to row
-	  if (page.getHeaders() != null) {
-	    page.getHeaders().clear();
-	  }
-	  for (String key : headers.names()) {
-	    page.getHeaders().put(new Utf8(key), new Utf8(headers.get(key)));
-	  }
+      if (page.getHeaders() != null) {
+        page.getHeaders().clear();
+      }
+      for (String key : headers.names()) {
+        page.getHeaders().put(new Utf8(key), new Utf8(headers.get(key)));
+      }
 
       // Logger trace message
       if (Http.LOG.isTraceEnabled()) {
@@ -188,15 +189,15 @@ public class HttpResponse implements Res
     }
   }
 
-  
-  /* ------------------------- *
-   * <implementation:Response> *
-   * ------------------------- */
-  
+  /*
+   * ------------------------- * <implementation:Response> *
+   * -------------------------
+   */
+
   public URL getUrl() {
     return url;
   }
-  
+
   public int getCode() {
     return code;
   }
@@ -204,7 +205,7 @@ public class HttpResponse implements Res
   public String getHeader(String name) {
     return headers.get(name);
   }
-  
+
   public Metadata getHeaders() {
     return headers;
   }
@@ -213,8 +214,8 @@ public class HttpResponse implements Res
     return content;
   }
 
-  /* -------------------------- *
-   * </implementation:Response> *
-   * -------------------------- */
+  /*
+   * -------------------------- * </implementation:Response> *
+   * --------------------------
+   */
 }
-