You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2012/11/10 15:37:38 UTC

svn commit: r1407825 [2/2] - in /manifoldcf/branches/CONNECTORS-120/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint: CommonsHTTPSender.java SPSProxyHelper.java SharePointRepository.java

Modified: manifoldcf/branches/CONNECTORS-120/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-120/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java?rev=1407825&r1=1407824&r2=1407825&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-120/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java (original)
+++ manifoldcf/branches/CONNECTORS-120/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java Sat Nov 10 14:37:37 2012
@@ -34,13 +34,26 @@ import java.util.Locale;
 import java.util.List;
 import java.util.Set;
 import java.util.HashSet;
+import java.util.concurrent.TimeUnit;
 import java.net.*;
 
-import org.apache.commons.httpclient.*;
-import org.apache.commons.httpclient.methods.*;
-import org.apache.commons.httpclient.auth.*;
-import org.apache.commons.httpclient.params.*;
-import org.apache.commons.httpclient.protocol.*;
+import org.apache.http.conn.ClientConnectionManager;
+import org.apache.http.client.HttpClient;
+import org.apache.http.impl.conn.PoolingClientConnectionManager;
+import org.apache.http.conn.scheme.Scheme;
+import org.apache.http.conn.ssl.SSLSocketFactory;
+import org.apache.http.conn.ssl.BrowserCompatHostnameVerifier;
+import org.apache.http.HttpEntity;
+import org.apache.http.HttpResponse;
+import org.apache.http.auth.AuthScope;
+import org.apache.http.auth.NTCredentials;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.impl.client.DefaultHttpClient;
+import org.apache.http.impl.client.DefaultRedirectStrategy;
+import org.apache.http.util.EntityUtils;
+import org.apache.http.params.BasicHttpParams;
+import org.apache.http.params.HttpParams;
+import org.apache.http.params.CoreConnectionPNames;
 
 
 /** This is the "repository connector" for Microsoft SharePoint.
@@ -87,9 +100,9 @@ public class SharePointRepository extend
   // SSL support
   private String keystoreData = null;
   private IKeystoreManager keystoreManager = null;
-  private SharepointSecureSocketFactory secureSocketFactory = null;
-  private ProtocolFactory myFactory = null;
-  private MultiThreadedHttpConnectionManager connectionManager = null;
+  
+  private ClientConnectionManager connectionManager = null;
+  private HttpClient httpClient = null;
 
   // Current host name
   private static String currentHost = null;
@@ -187,24 +200,39 @@ public class SharePointRepository extend
 
       // Set up ssl if indicated
       keystoreData = params.getParameter( "keystore" );
-      myFactory = new ProtocolFactory();
+
+      PoolingClientConnectionManager localConnectionManager = new PoolingClientConnectionManager();
+      localConnectionManager.setMaxTotal(1);
+      connectionManager = localConnectionManager;
 
       if (keystoreData != null)
       {
         keystoreManager = KeystoreManagerFactory.make("",keystoreData);
-        secureSocketFactory = new SharepointSecureSocketFactory(keystoreManager.getSecureSocketFactory());
-        Protocol myHttpsProtocol = new Protocol("https", (ProtocolSocketFactory)secureSocketFactory, 443);
-        myFactory.registerProtocol("https",myHttpsProtocol);
+        SSLSocketFactory myFactory = new SSLSocketFactory(keystoreManager.getSecureSocketFactory(), new BrowserCompatHostnameVerifier());
+        Scheme myHttpsProtocol = new Scheme("https", 443, myFactory);
+        connectionManager.getSchemeRegistry().register(myHttpsProtocol);
       }
 
-      connectionManager = new MultiThreadedHttpConnectionManager();
-      connectionManager.getParams().setMaxTotalConnections(1);
-
       fileBaseUrl = serverUrl + encodedServerLocation;
 
+      BasicHttpParams params = new BasicHttpParams();
+      params.setBooleanParameter(CoreConnectionPNames.TCP_NODELAY,true);
+      params.setBooleanParameter(CoreConnectionPNames.STALE_CONNECTION_CHECK,false);
+      params.setIntParameter(CoreConnectionPNames.SO_TIMEOUT,60000);
+      DefaultHttpClient localHttpClient = new DefaultHttpClient(connectionManager,params);
+      localHttpClient.setRedirectStrategy(new DefaultRedirectStrategy());
+      if (strippedUserName != null)
+      {
+        localHttpClient.getCredentialsProvider().setCredentials(
+          new AuthScope(serverName,serverPort),
+          new NTCredentials(strippedUserName, password, currentHost, ntlmDomain));
+      }
+
+      httpClient = localHttpClient;
+      
       proxy = new SPSProxyHelper( serverUrl, encodedServerLocation, serverLocation, userName, password,
-        myFactory, getClass(), "sharepoint-client-config.wsdd",
-        connectionManager );
+        getClass(), "sharepoint-client-config.wsdd",
+        httpClient );
       
     }
     sessionTimeout = System.currentTimeMillis() + sessionExpirationInterval;
@@ -225,10 +253,9 @@ public class SharePointRepository extend
 
     keystoreData = null;
     keystoreManager = null;
-    secureSocketFactory = null;
-    myFactory = null;
 
     proxy = null;
+    httpClient = null;
     if (connectionManager != null)
       connectionManager.shutdown();
     connectionManager = null;
@@ -275,10 +302,9 @@ public class SharePointRepository extend
 
     keystoreData = null;
     keystoreManager = null;
-    secureSocketFactory = null;
-    myFactory = null;
 
     proxy = null;
+    httpClient = null;
     if (connectionManager != null)
       connectionManager.shutdown();
     connectionManager = null;
@@ -354,7 +380,7 @@ public class SharePointRepository extend
     if (proxy != null && System.currentTimeMillis() >= sessionTimeout)
       expireSession();
     if (connectionManager != null)
-      connectionManager.closeIdleConnections(60000L);
+      connectionManager.closeIdleConnections(60000L,TimeUnit.MILLISECONDS);
   }
 
   /** Request arbitrary connector information.
@@ -1357,6 +1383,7 @@ public class SharePointRepository extend
               if (Logging.connectors.isDebugEnabled())
                 Logging.connectors.debug( "SharePoint: Processing file '"+documentIdentifier+"'; url: '" + fileUrl + "'" );
 
+
               // Set stuff up for fetch activity logging
               long startFetchTime = System.currentTimeMillis();
               try
@@ -1369,119 +1396,49 @@ public class SharePointRepository extend
                   OutputStream os = new FileOutputStream(tempFile);
                   try
                   {
-                    // Read the document.
+                    // Catch all exceptions having to do with reading the document
                     try
                     {
-                      HttpClient httpClient = new HttpClient(connectionManager);
-                      HostConfiguration clientConf = new HostConfiguration();
-                      clientConf.setParams(new HostParams());
-                      clientConf.setHost(serverName,serverPort,myFactory.getProtocol(serverProtocol));
-
-                      Credentials credentials;
-                      if (strippedUserName != null)
-                        credentials =  new NTCredentials(strippedUserName, password, currentHost, ntlmDomain);
-                      else
-                        credentials = null;
-
-                      if (credentials != null)
-                        httpClient.getState().setCredentials(new AuthScope(serverName,serverPort,null),
-                          credentials);
-
-                      HttpMethodBase method = new GetMethod( encodedServerLocation + encodedDocumentPath );
-                      try
+                      ExecuteMethodThread emt = new ExecuteMethodThread(httpClient,
+                        serverUrl + encodedServerLocation + encodedDocumentPath, os);
+                      emt.start();
+                      emt.join();
+                      Throwable t = emt.getException();
+                      if (t instanceof InterruptedException)
+                        throw (InterruptedException)t;
+                      if (t instanceof IOException)
+                        throw (IOException)t;
+                      else if (t instanceof Error)
+                        throw (Error)t;
+                      else if (t instanceof org.apache.http.HttpException)
+                        throw (org.apache.http.HttpException)t;
+                      else if (t instanceof RuntimeException)
+                        throw (RuntimeException)t;
+                      
+                      int returnCode = emt.getResponse();
+                        
+                      if (returnCode == 404 || returnCode == 401 || returnCode == 400)
                       {
-                        // Set up SSL using our keystore
-                        method.getParams().setParameter("http.socket.timeout", new Integer(60000));
-
-                        int returnCode;
-                        ExecuteMethodThread t = new ExecuteMethodThread(httpClient,clientConf,method);
-                        try
-                        {
-                          t.start();
-                          t.join();
-                          Throwable thr = t.getException();
-                          if (thr != null)
-                          {
-                            if (thr instanceof IOException)
-                              throw (IOException)thr;
-                            if (thr instanceof RuntimeException)
-                              throw (RuntimeException)thr;
-                            else
-                              throw (Error)thr;
-                          }
-                          returnCode = t.getResponse();
-                        }
-                        catch (InterruptedException e)
-                        {
-                          t.interrupt();
-                          // We need the caller to abandon any connections left around, so rethrow in a way that forces them to process the event properly.
-                          method = null;
-                          throw e;
-                        }
-                        if (returnCode == HttpStatus.SC_NOT_FOUND || returnCode == HttpStatus.SC_UNAUTHORIZED || returnCode == HttpStatus.SC_BAD_REQUEST)
-                        {
-                          // Well, sharepoint thought the document was there, but it really isn't, so delete it.
-                          if (Logging.connectors.isDebugEnabled())
-                            Logging.connectors.debug("SharePoint: Document at '"+encodedServerLocation+encodedDocumentPath+"' failed to fetch with code "+Integer.toString(returnCode)+", deleting");
-                          activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
-                            null,documentIdentifier,"Not found",Integer.toString(returnCode),null);
-                          activities.deleteDocument(documentIdentifier,version);
-                          i++;
-                          continue;
-                        }
-                        if (returnCode != HttpStatus.SC_OK)
-                        {
-                          activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
-                            null,documentIdentifier,"Error","Http status "+Integer.toString(returnCode),null);
-                          throw new ManifoldCFException("Error fetching document '"+fileUrl+"': "+Integer.toString(returnCode));
-                        }
-
-                        // int contentSize = (int)method.getResponseContentLength();
-                        InputStream is = method.getResponseBodyAsStream();
-                        try
-                        {
-                          byte[] transferBuffer = new byte[65536];
-                          while (true)
-                          {
-                            int amt = is.read(transferBuffer);
-                            if (amt == -1)
-                              break;
-                            os.write(transferBuffer,0,amt);
-                          }
-                        }
-                        finally
-                        {
-                          try
-                          {
-                            is.close();
-                          }
-                          catch (java.net.SocketTimeoutException e)
-                          {
-                            Logging.connectors.warn("SharePoint: Socket timeout error closing connection to file '"+fileUrl+"': "+e.getMessage(),e);
-                          }
-                          catch (org.apache.commons.httpclient.ConnectTimeoutException e)
-                          {
-                            Logging.connectors.warn("SharePoint: Connect timeout error closing connection to file '"+fileUrl+"': "+e.getMessage(),e);
-                          }
-                          catch (InterruptedIOException e)
-                          {
-                            throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
-                          }
-                          catch (IOException e)
-                          {
-                            Logging.connectors.warn("SharePoint: Error closing connection to file '"+fileUrl+"': "+e.getMessage(),e);
-                          }
-                        }
+                        // Well, sharepoint thought the document was there, but it really isn't, so delete it.
+                        if (Logging.connectors.isDebugEnabled())
+                          Logging.connectors.debug("SharePoint: Document at '"+encodedServerLocation+encodedDocumentPath+"' failed to fetch with code "+Integer.toString(returnCode)+", deleting");
+                        activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+                          null,documentIdentifier,"Not found",Integer.toString(returnCode),null);
+                        activities.deleteDocument(documentIdentifier,version);
+                        i++;
+                        continue;
                       }
-                      finally
+                      else if (returnCode != 200)
                       {
-                        if (method != null)
-                          method.releaseConnection();
+                        activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+                          null,documentIdentifier,"Error","Http status "+Integer.toString(returnCode),null);
+                        throw new ManifoldCFException("Error fetching document '"+fileUrl+"': "+Integer.toString(returnCode));
                       }
 
                       // Log the normal fetch activity
                       activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
                         new Long(tempFile.length()),documentIdentifier,"Success",null,null);
+
                     }
                     catch (InterruptedException e)
                     {
@@ -1496,7 +1453,7 @@ public class SharePointRepository extend
                       throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"', retrying: "+e.getMessage(),e,currentTime + 300000L,
                         currentTime + 12 * 60 * 60000L,-1,true);
                     }
-                    catch (org.apache.commons.httpclient.ConnectTimeoutException e)
+                    catch (org.apache.http.conn.ConnectTimeoutException e)
                     {
                       activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
                         new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
@@ -1516,7 +1473,7 @@ public class SharePointRepository extend
                         new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
                       throw new ManifoldCFException("SharePoint: Illegal argument: "+e.getMessage(),e);
                     }
-                    catch (HttpException e)
+                    catch (org.apache.http.HttpException e)
                     {
                       Logging.connectors.warn("SharePoint: HttpException thrown",e);
                       activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
@@ -1540,6 +1497,7 @@ public class SharePointRepository extend
                     os.close();
                   }
                   
+                  // Ingest the document
                   long documentLength = tempFile.length();
                   if (activities.checkLengthIndexable(documentLength))
                   {
@@ -4828,27 +4786,59 @@ public class SharePointRepository extend
 
   protected static class ExecuteMethodThread extends Thread
   {
-    protected HttpClient client;
-    protected HostConfiguration hostConfiguration;
-    protected HttpMethodBase executeMethod;
+    protected final HttpClient httpClient;
+    protected final String url;
+    protected final OutputStream os;
+
     protected Throwable exception = null;
-    protected int rval = 0;
+    protected int returnCode = 0;
 
-    public ExecuteMethodThread(HttpClient client, HostConfiguration hostConfiguration, HttpMethodBase executeMethod)
+    public ExecuteMethodThread( HttpClient httpClient, String url, OutputStream os )
     {
       super();
       setDaemon(true);
-      this.client = client;
-      this.hostConfiguration = hostConfiguration;
-      this.executeMethod = executeMethod;
+      this.httpClient = httpClient;
+      this.url = url;
+      this.os = os;
     }
 
     public void run()
     {
       try
       {
-        // Call the execute method appropriately
-        rval = client.executeMethod(hostConfiguration,executeMethod,null);
+        HttpGet method = new HttpGet( url );
+        // Try block to insure that the connection gets cleaned up
+        try
+        {
+          // Begin the fetch
+          HttpResponse response = httpClient.execute(method);
+          returnCode = response.getStatusLine().getStatusCode();
+          
+          if (returnCode == 200)
+          {
+            // Process the data
+            HttpEntity entity = response.getEntity();
+            if (entity != null)
+            {
+              InputStream is = entity.getContent();
+              // Figure out what to do with the data. 
+              byte[] transferBuffer = new byte[65536];
+              while (true)
+              {
+                int amt = is.read(transferBuffer);
+                if (amt == -1)
+                  break;
+                os.write(transferBuffer,0,amt);
+              }
+            }
+          }
+        }
+        finally
+        {
+          // Consumes and closes the stream, releasing the connection
+          method.abort();
+        }
+
       }
       catch (Throwable e)
       {
@@ -4863,7 +4853,7 @@ public class SharePointRepository extend
 
     public int getResponse()
     {
-      return rval;
+      return returnCode;
     }
   }
 
@@ -5776,205 +5766,5 @@ public class SharePointRepository extend
     }
   }
 
-  /** Socket factory for our https implementation.
-  */
-  protected static class MySSLSocketFactory implements org.apache.commons.httpclient.protocol.SecureProtocolSocketFactory
-  {
-    protected javax.net.ssl.SSLSocketFactory thisSocketFactory = null;
-    protected IKeystoreManager keystore;
-
-    /** Constructor.  Pass the keystore.
-    */
-    public MySSLSocketFactory(IKeystoreManager keystore)
-      throws ManifoldCFException
-    {
-      this.keystore = keystore;
-      thisSocketFactory = keystore.getSecureSocketFactory();
-    }
-
-
-    public Socket createSocket(String host,
-      int port,
-      InetAddress clientHost,
-      int clientPort)
-      throws IOException, UnknownHostException
-    {
-      return thisSocketFactory.createSocket(host,
-        port,
-        clientHost,
-        clientPort);
-    }
-
-
-    public Socket createSocket(final String host,
-      final int port,
-      final InetAddress localAddress,
-      final int localPort,
-      final HttpConnectionParams params)
-      throws IOException, UnknownHostException, ConnectTimeoutException
-    {
-      if (params == null)
-      {
-        throw new IllegalArgumentException("Parameters may not be null");
-      }
-      int timeout = params.getConnectionTimeout();
-      if (timeout == 0)
-      {
-        return createSocket(host, port, localAddress, localPort);
-      }
-      else
-      {
-        return createSocket(host, port, localAddress, localPort);
-
-        /*
-        return thisSocketFactory.createSocket(host,
-          port,
-          localAddress,
-          localPort,
-          timeout);
-        */
-      }
-    }
-
-    public Socket createSocket(String host, int port)
-      throws IOException, UnknownHostException
-    {
-      return thisSocketFactory.createSocket(host,port);
-    }
-
-    public Socket createSocket(Socket socket,
-      String host,
-      int port,
-      boolean autoClose)
-      throws IOException, UnknownHostException
-    {
-      return thisSocketFactory.createSocket(socket,
-        host,
-        port,
-        autoClose);
-    }
-
-
-    /** There's a socket factory per keystore;
-    * look at the keystore to do the comparison.
-    */
-    public boolean equals(Object obj)
-    {
-      if (obj == null || !(obj instanceof MySSLSocketFactory))
-        return false;
-      MySSLSocketFactory other = (MySSLSocketFactory)obj;
-      try
-      {
-        return keystore.getString().equals(other.keystore.getString());
-      }
-      catch (ManifoldCFException e)
-      {
-        return false;
-      }
-    }
-
-    public int hashCode()
-    {
-      try
-      {
-        return keystore.getString().hashCode();
-      }
-      catch (ManifoldCFException e)
-      {
-        return 0;
-      }
-    }
-
-
-  }
-
-  /** HTTPClient secure socket factory, which implements SecureProtocolSocketFactory
-  */
-  protected static class SharepointSecureSocketFactory implements SecureProtocolSocketFactory
-  {
-    /** This is the javax.net socket factory.
-    */
-    protected javax.net.ssl.SSLSocketFactory socketFactory;
-
-    /** Constructor */
-    public SharepointSecureSocketFactory(javax.net.ssl.SSLSocketFactory socketFactory)
-    {
-      this.socketFactory = socketFactory;
-    }
-
-    public Socket createSocket(
-      String host,
-      int port,
-      InetAddress clientHost,
-      int clientPort)
-      throws IOException, UnknownHostException
-    {
-      return socketFactory.createSocket(
-        host,
-        port,
-        clientHost,
-        clientPort
-      );
-    }
-
-    public Socket createSocket(
-      final String host,
-      final int port,
-      final InetAddress localAddress,
-      final int localPort,
-      final HttpConnectionParams params
-    ) throws IOException, UnknownHostException, ConnectTimeoutException
-    {
-      if (params == null)
-      {
-        throw new IllegalArgumentException("Parameters may not be null");
-      }
-      int timeout = params.getConnectionTimeout();
-      if (timeout == 0)
-      {
-        return createSocket(host, port, localAddress, localPort);
-      }
-      else
-        throw new IllegalArgumentException("This implementation does not handle non-zero connection timeouts");
-    }
-
-    public Socket createSocket(String host, int port)
-      throws IOException, UnknownHostException
-    {
-      return socketFactory.createSocket(
-        host,
-        port
-      );
-    }
-
-    public Socket createSocket(
-      Socket socket,
-      String host,
-      int port,
-      boolean autoClose)
-      throws IOException, UnknownHostException
-    {
-      return socketFactory.createSocket(
-        socket,
-        host,
-        port,
-        autoClose
-      );
-    }
-
-    public boolean equals(Object obj)
-    {
-      if (obj == null || !(obj instanceof SharepointSecureSocketFactory))
-        return false;
-      // Each object is unique
-      return super.equals(obj);
-    }
-
-    public int hashCode()
-    {
-      return super.hashCode();
-    }
-
-  }
 
 }