You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by jn...@apache.org on 2014/05/16 15:40:21 UTC

svn commit: r1595196 - in /nutch/branches/2.x: CHANGES.txt src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java src/plugin/protocol-http/plugin.xml src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java

Author: jnioche
Date: Fri May 16 13:40:21 2014
New Revision: 1595196

URL: http://svn.apache.org/r1595196
Log:
NUTCH-1676 Add rudimentary SSL support to protocol-http

Modified:
    nutch/branches/2.x/CHANGES.txt
    nutch/branches/2.x/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
    nutch/branches/2.x/src/plugin/protocol-http/plugin.xml
    nutch/branches/2.x/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java

Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1595196&r1=1595195&r2=1595196&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Fri May 16 13:40:21 2014
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Current Development
 
+* NUTCH-1676 Add rudimentary SSL support to protocol-http (jnioche, markus)
+
 * NUTCH-1674 Use batchId filter to enable scan (GORA-119) for Fetch,Parse,Update,Index (Tien Nguyen Manh and Alparslan Avcı via jnioche)
 
 * NUTCH-1714 Upgrade to Gora 0.4 (Alparslan Avcı via jnioche)

Modified: nutch/branches/2.x/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java?rev=1595196&r1=1595195&r2=1595196&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java (original)
+++ nutch/branches/2.x/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java Fri May 16 13:40:21 2014
@@ -20,6 +20,9 @@ package org.apache.nutch.protocol.http.a
 import java.io.IOException;
 import java.net.URL;
 import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.avro.util.Utf8;
@@ -95,6 +98,12 @@ public abstract class HttpBase implement
   /** Response Time */
   protected boolean responseTime = true;
   
+  /** Which TLS/SSL protocols to support */
+  protected Set<String> tlsPreferredProtocols;
+  
+  /** Which TLS/SSL cipher suites to support */
+  protected Set<String> tlsPreferredCipherSuites;
+  
   /** Creates a new instance of HttpBase */
   public HttpBase() {
     this(null);
@@ -124,6 +133,32 @@ public abstract class HttpBase implement
     this.useHttp11 = conf.getBoolean("http.useHttp11", false);
     this.responseTime = conf.getBoolean("http.store.responsetime", true);
     this.robots.setConf(conf);
+    
+    String[] protocols = conf.getStrings("http.tls.supported.protocols", "TLSv1.2", "TLSv1.1", "TLSv1", "SSLv3");
+    String[] ciphers = conf.getStrings("http.tls.supported.cipher.suites", 
+        "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384","TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384",
+        "TLS_RSA_WITH_AES_256_CBC_SHA256","TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA384","TLS_ECDH_RSA_WITH_AES_256_CBC_SHA384",
+        "TLS_DHE_RSA_WITH_AES_256_CBC_SHA256","TLS_DHE_DSS_WITH_AES_256_CBC_SHA256","TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA",
+        "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA","TLS_RSA_WITH_AES_256_CBC_SHA","TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA",
+        "TLS_ECDH_RSA_WITH_AES_256_CBC_SHA","TLS_DHE_RSA_WITH_AES_256_CBC_SHA","TLS_DHE_DSS_WITH_AES_256_CBC_SHA",
+        "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256","TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256","TLS_RSA_WITH_AES_128_CBC_SHA256",
+        "TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA256","TLS_ECDH_RSA_WITH_AES_128_CBC_SHA256","TLS_DHE_RSA_WITH_AES_128_CBC_SHA256",
+        "TLS_DHE_DSS_WITH_AES_128_CBC_SHA256","TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA","TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA",
+        "TLS_RSA_WITH_AES_128_CBC_SHA","TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA","TLS_ECDH_RSA_WITH_AES_128_CBC_SHA",
+        "TLS_DHE_RSA_WITH_AES_128_CBC_SHA","TLS_DHE_DSS_WITH_AES_128_CBC_SHA","TLS_ECDHE_ECDSA_WITH_RC4_128_SHA",
+        "TLS_ECDHE_RSA_WITH_RC4_128_SHA","SSL_RSA_WITH_RC4_128_SHA","TLS_ECDH_ECDSA_WITH_RC4_128_SHA",
+        "TLS_ECDH_RSA_WITH_RC4_128_SHA","TLS_ECDHE_ECDSA_WITH_3DES_EDE_CBC_SHA","TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA",
+        "SSL_RSA_WITH_3DES_EDE_CBC_SHA","TLS_ECDH_ECDSA_WITH_3DES_EDE_CBC_SHA","TLS_ECDH_RSA_WITH_3DES_EDE_CBC_SHA",
+        "SSL_DHE_RSA_WITH_3DES_EDE_CBC_SHA","SSL_DHE_DSS_WITH_3DES_EDE_CBC_SHA","SSL_RSA_WITH_RC4_128_MD5",
+        "TLS_EMPTY_RENEGOTIATION_INFO_SCSV","TLS_RSA_WITH_NULL_SHA256","TLS_ECDHE_ECDSA_WITH_NULL_SHA",
+        "TLS_ECDHE_RSA_WITH_NULL_SHA","SSL_RSA_WITH_NULL_SHA","TLS_ECDH_ECDSA_WITH_NULL_SHA","TLS_ECDH_RSA_WITH_NULL_SHA",
+        "SSL_RSA_WITH_NULL_MD5","SSL_RSA_WITH_DES_CBC_SHA","SSL_DHE_RSA_WITH_DES_CBC_SHA","SSL_DHE_DSS_WITH_DES_CBC_SHA",
+        "TLS_KRB5_WITH_RC4_128_SHA","TLS_KRB5_WITH_RC4_128_MD5","TLS_KRB5_WITH_3DES_EDE_CBC_SHA","TLS_KRB5_WITH_3DES_EDE_CBC_MD5",
+        "TLS_KRB5_WITH_DES_CBC_SHA","TLS_KRB5_WITH_DES_CBC_MD5");
+    
+    tlsPreferredProtocols = new HashSet<String>(Arrays.asList(protocols));
+    tlsPreferredCipherSuites = new HashSet<String>(Arrays.asList(ciphers));
+
     logConf();
   }
 
@@ -249,6 +284,14 @@ public abstract class HttpBase implement
   public boolean getUseHttp11() {
     return useHttp11;
   }
+  
+  public Set<String> getTlsPreferredCipherSuites() {
+    return tlsPreferredCipherSuites;
+  }
+  
+  public Set<String> getTlsPreferredProtocols() {
+    return tlsPreferredProtocols;
+  }
 
   private static String getAgentString(String agentName,
       String agentVersion,

Modified: nutch/branches/2.x/src/plugin/protocol-http/plugin.xml
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-http/plugin.xml?rev=1595196&r1=1595195&r2=1595196&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-http/plugin.xml (original)
+++ nutch/branches/2.x/src/plugin/protocol-http/plugin.xml Fri May 16 13:40:21 2014
@@ -40,6 +40,10 @@
                       class="org.apache.nutch.protocol.http.Http">
         <parameter name="protocolName" value="http"/>
       </implementation>
+      <implementation id="org.apache.nutch.protocol.http.Http"
+                      class="org.apache.nutch.protocol.http.Http">
+           <parameter name="protocolName" value="https"/>
+      </implementation>
 
    </extension>
 

Modified: nutch/branches/2.x/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java?rev=1595196&r1=1595195&r2=1595196&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java Fri May 16 13:40:21 2014
@@ -32,8 +32,13 @@ import org.apache.nutch.storage.WebPage;
 import java.io.*;
 import java.net.InetSocketAddress;
 import java.net.Socket;
+import javax.net.ssl.SSLSocket;
+import javax.net.ssl.SSLSocketFactory;
 import java.net.URL;
 import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
 
 /** An HTTP response. */
 public class HttpResponse implements Response {
@@ -45,6 +50,10 @@ public class HttpResponse implements Res
   private int code;
   private final Metadata headers = new SpellCheckedMetadata();
 
+  protected enum Scheme {
+    HTTP,
+    HTTPS,
+  }
 
   public HttpResponse(HttpBase http, URL url, WebPage page)
   throws ProtocolException, IOException {
@@ -52,8 +61,15 @@ public class HttpResponse implements Res
     this.http = http;
     this.url = url;
 
-    if (!"http".equals(url.getProtocol()))
-      throw new HttpException("Not an HTTP url:" + url);
+    Scheme scheme = null;
+ 
+    if ("http".equals(url.getProtocol())) {
+      scheme = Scheme.HTTP;
+    } else if ("https".equals(url.getProtocol())) {
+      scheme = Scheme.HTTPS;
+    } else {
+      throw new HttpException("Unknown scheme (not http/https) for url:" + url);
+    }
 
     if (Http.LOG.isTraceEnabled()) {
       Http.LOG.trace("fetching " + url);
@@ -69,7 +85,11 @@ public class HttpResponse implements Res
     int port;
     String portString;
     if (url.getPort() == -1) {
-      port= 80;
+      if (scheme == Scheme.HTTP) {
+        port = 80;
+      } else {
+        port = 443;
+      }
       portString= "";
     } else {
       port= url.getPort();
@@ -88,6 +108,26 @@ public class HttpResponse implements Res
       InetSocketAddress sockAddr= new InetSocketAddress(sockHost, sockPort);
       socket.connect(sockAddr, http.getTimeout());
       
+      if (scheme == Scheme.HTTPS) {
+        SSLSocketFactory factory = (SSLSocketFactory)SSLSocketFactory.getDefault();
+        SSLSocket sslsocket = (SSLSocket)factory.createSocket(socket, sockHost, sockPort, true);
+        sslsocket.setUseClientMode(true);
+        
+        // Get the protocols and ciphers supported by this JVM    
+        Set<String> protocols = new HashSet<String>(Arrays.asList(sslsocket.getSupportedProtocols()));
+        Set<String> ciphers = new HashSet<String>(Arrays.asList(sslsocket.getSupportedCipherSuites()));
+        
+        // Intersect with preferred protocols and ciphers
+        protocols.retainAll(http.getTlsPreferredProtocols());
+        ciphers.retainAll(http.getTlsPreferredCipherSuites());
+        
+        sslsocket.setEnabledProtocols(protocols.toArray(new String[protocols.size()]));
+        sslsocket.setEnabledCipherSuites(ciphers.toArray(new String[ciphers.size()]));
+        
+        sslsocket.startHandshake();
+        socket = sslsocket;
+      }
+      
       conf = http.getConf();
       if (sockAddr != null
           && conf.getBoolean("store.ip.address", false) == true) {