You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2012/08/03 15:12:33 UTC

svn commit: r1368931 [1/2] - in /manifoldcf/branches/CONNECTORS-497/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint: SPSProxyHelper.java SharePointRepository.java

Author: kwright
Date: Fri Aug  3 13:12:33 2012
New Revision: 1368931

URL: http://svn.apache.org/viewvc?rev=1368931&view=rev
Log:
Create a distinction between lists and libraries in Sharepoint connector document identifiers, and separate SPSProxyHelper methods so that we can treat libraries and lists distinctly.

Modified:
    manifoldcf/branches/CONNECTORS-497/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java
    manifoldcf/branches/CONNECTORS-497/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java

Modified: manifoldcf/branches/CONNECTORS-497/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-497/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java?rev=1368931&r1=1368930&r2=1368931&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-497/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java (original)
+++ manifoldcf/branches/CONNECTORS-497/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java Fri Aug  3 13:12:33 2012
@@ -807,7 +807,7 @@ public class SPSProxyHelper {
   /**
   *
   * @param parentSite
-  * @param docLibrary or list name
+  * @param docLibrary
   * @return document library ID
   * @throws ManifoldCFException
   * @throws ServiceInterruption
@@ -888,7 +888,167 @@ public class SPSProxyHelper {
             }
           }
         }
-        else if ( baseType.equals("0") )
+      }
+
+      // Not found - return null
+      return null;
+    }
+    catch (java.net.MalformedURLException e)
+    {
+      throw new ManifoldCFException("Bad SharePoint url: "+e.getMessage(),e);
+    }
+    catch (javax.xml.rpc.ServiceException e)
+    {
+      if (Logging.connectors.isDebugEnabled())
+        Logging.connectors.debug("SharePoint: Got a service exception getting the library ID for site "+parentSite+" library "+docLibrary+" - retrying",e);
+      currentTime = System.currentTimeMillis();
+      throw new ServiceInterruption("Service exception: "+e.getMessage(), e, currentTime + 300000L,
+        currentTime + 12 * 60 * 60000L,-1,true);
+    }
+    catch (org.apache.axis.AxisFault e)
+    {
+      currentTime = System.currentTimeMillis();
+      if (e.getFaultCode().equals(new javax.xml.namespace.QName("http://xml.apache.org/axis/","HTTP")))
+      {
+        org.w3c.dom.Element elem = e.lookupFaultDetail(new javax.xml.namespace.QName("http://xml.apache.org/axis/","HttpErrorCode"));
+        if (elem != null)
+        {
+          elem.normalize();
+          String httpErrorCode = elem.getFirstChild().getNodeValue().trim();
+          if (httpErrorCode.equals("404"))
+          {
+            // Page did not exist
+            if (Logging.connectors.isDebugEnabled())
+              Logging.connectors.debug("SharePoint: The page at "+baseUrl+parentSite+" did not exist; assuming library deleted");
+            return null;
+          }
+          else if (httpErrorCode.equals("401"))
+          {
+            // User did not have permissions for this library to list libraries
+            if (Logging.connectors.isDebugEnabled())
+              Logging.connectors.debug("SharePoint: The crawl user did not have access to list libraries for "+baseUrl+parentSite+"; skipping");
+            return null;
+          }
+          else if (httpErrorCode.equals("403"))
+            throw new ManifoldCFException("Http error "+httpErrorCode+" while reading from "+baseUrl+parentSite+" - check IIS and SharePoint security settings! "+e.getMessage(),e);
+          else
+            throw new ManifoldCFException("Unexpected http error code "+httpErrorCode+" accessing SharePoint at "+baseUrl+parentSite+": "+e.getMessage(),e);
+        }
+        throw new ManifoldCFException("Unknown http error occurred: "+e.getMessage(),e);
+      }
+      else if (e.getFaultCode().equals(new javax.xml.namespace.QName("http://schemas.xmlsoap.org/soap/envelope/","Server")))
+      {
+        org.w3c.dom.Element elem = e.lookupFaultDetail(new javax.xml.namespace.QName("http://schemas.microsoft.com/sharepoint/soap/","errorcode"));
+        if (elem != null)
+        {
+          elem.normalize();
+          String sharepointErrorCode = elem.getFirstChild().getNodeValue().trim();
+          if (sharepointErrorCode.equals("0x82000006"))
+          {
+            // List did not exist
+            if (Logging.connectors.isDebugEnabled())
+              Logging.connectors.debug("SharePoint: The list "+docLibrary+" in site "+parentSite+" did not exist; assuming library deleted");
+            return null;
+          }
+          else
+          {
+            if (Logging.connectors.isDebugEnabled())
+            {
+              org.w3c.dom.Element elem2 = e.lookupFaultDetail(new javax.xml.namespace.QName("http://schemas.microsoft.com/sharepoint/soap/","errorstring"));
+              String errorString = "";
+              if (elem != null)
+                errorString = elem2.getFirstChild().getNodeValue().trim();
+
+              Logging.connectors.debug("SharePoint: Getting library ID for the list "+docLibrary+" in site "+parentSite+" failed with unexpected SharePoint error code "+sharepointErrorCode+": "+errorString+" - Skipping",e);
+            }
+            return null;
+          }
+        }
+        if (Logging.connectors.isDebugEnabled())
+          Logging.connectors.debug("SharePoint: Unknown SharePoint server error getting library ID for site "+parentSite+" library "+docLibrary+" - axis fault = "+e.getFaultCode().getLocalPart()+", detail = "+e.getFaultString()+" - retrying",e);
+
+        throw new ServiceInterruption("Unknown SharePoint server error: "+e.getMessage()+" - retrying", e, currentTime + 300000L,
+          currentTime + 3 * 60 * 60000L,-1,false);
+      }
+
+      if (e.getFaultCode().equals(new javax.xml.namespace.QName("http://schemas.xmlsoap.org/soap/envelope/","Server.userException")))
+      {
+        String exceptionName = e.getFaultString();
+        if (exceptionName.equals("java.lang.InterruptedException"))
+          throw new ManifoldCFException("Interrupted",ManifoldCFException.INTERRUPTED);
+      }
+
+      if (Logging.connectors.isDebugEnabled())
+        Logging.connectors.debug("SharePoint: Got an unknown remote exception getting library ID for site "+parentSite+" library "+docLibrary+" - axis fault = "+e.getFaultCode().getLocalPart()+", detail = "+e.getFaultString()+" - retrying",e);
+      throw new ServiceInterruption("Remote procedure exception: "+e.getMessage(), e, currentTime + 300000L,
+        currentTime + 3 * 60 * 60000L,-1,false);
+    }
+    catch (java.rmi.RemoteException e)
+    {
+      // We expect the axis exception to be thrown, not this generic one!
+      // So, fail hard if we see it.
+      if (Logging.connectors.isDebugEnabled())
+        Logging.connectors.debug("SharePoint: Got an unexpected remote exception getting library ID for site "+parentSite+" library "+docLibrary,e);
+      throw new ManifoldCFException("Unexpected remote procedure exception: "+e.getMessage(), e);
+    }
+  }
+
+  /**
+  *
+  * @param parentSite
+  * @param list name
+  * @return document library ID
+  * @throws ManifoldCFException
+  * @throws ServiceInterruption
+  */
+  public String getListID(String parentSite, String parentSiteDecoded, String listName)
+    throws ServiceInterruption, ManifoldCFException
+  {
+    long currentTime;
+    try
+    {
+      // The old code here used to call the lists service to find the guid, using the doc library url name as the title.
+      // This did not work when the title differed from the url name.
+      // On 5/8/2008 I modified the code to use the lists service to locate the correct record by matching the defaultViewUrl field,
+      // so that we instead iterate through the children.  It's more expensive but it works.
+      String parentSiteRequest = parentSite;
+
+      if ( parentSiteRequest.equals("/"))
+      {
+        parentSiteRequest = ""; // root case
+        parentSiteDecoded = "";
+      }
+
+      ListsWS listsService = new ListsWS( baseUrl + parentSiteRequest, userName, password, myFactory, configuration, connectionManager );
+      ListsSoap listsCall = listsService.getListsSoapHandler( );
+
+      GetListCollectionResponseGetListCollectionResult listResp = listsCall.getListCollection();
+      org.apache.axis.message.MessageElement[] lists = listResp.get_any();
+
+      XMLDoc doc = new XMLDoc( lists[0].toString() );
+      ArrayList nodeList = new ArrayList();
+
+      doc.processPath(nodeList, "*", null);
+      if (nodeList.size() != 1)
+      {
+        throw new ManifoldCFException("Bad xml - missing outer 'ns1:Lists' node - there are "+Integer.toString(nodeList.size())+" nodes");
+      }
+      Object parent = nodeList.get(0);
+      if (!doc.getNodeName(parent).equals("ns1:Lists"))
+        throw new ManifoldCFException("Bad xml - outer node is not 'ns1:Lists'");
+
+      nodeList.clear();
+      doc.processPath(nodeList, "*", parent);  // <ns1:Lists>
+
+      int chuckIndex = decodedServerLocation.length() + parentSiteDecoded.length();
+
+      int i = 0;
+      while (i < nodeList.size())
+      {
+        Object o = nodeList.get( i++ );
+
+        String baseType = doc.getValue( o, "BaseType");
+        if ( baseType.equals("0") )
         {
           // We think it's a list
 
@@ -916,7 +1076,7 @@ public class SPSProxyHelper {
               pathpart = urlPath.substring(index+1,k);
             }
 
-            if ( pathpart.equals(docLibrary) )
+            if ( pathpart.equals(listName) )
             {
               // We found it!
               // Return its ID
@@ -936,7 +1096,7 @@ public class SPSProxyHelper {
     catch (javax.xml.rpc.ServiceException e)
     {
       if (Logging.connectors.isDebugEnabled())
-        Logging.connectors.debug("SharePoint: Got a service exception getting the library ID for site "+parentSite+" list/library "+docLibrary+" - retrying",e);
+        Logging.connectors.debug("SharePoint: Got a service exception getting the list ID for site "+parentSite+" list "+listName+" - retrying",e);
       currentTime = System.currentTimeMillis();
       throw new ServiceInterruption("Service exception: "+e.getMessage(), e, currentTime + 300000L,
         currentTime + 12 * 60 * 60000L,-1,true);
@@ -955,14 +1115,14 @@ public class SPSProxyHelper {
           {
             // Page did not exist
             if (Logging.connectors.isDebugEnabled())
-              Logging.connectors.debug("SharePoint: The page at "+baseUrl+parentSite+" did not exist; assuming library deleted");
+              Logging.connectors.debug("SharePoint: The page at "+baseUrl+parentSite+" did not exist; assuming list deleted");
             return null;
           }
           else if (httpErrorCode.equals("401"))
           {
             // User did not have permissions for this library to list libraries
             if (Logging.connectors.isDebugEnabled())
-              Logging.connectors.debug("SharePoint: The crawl user did not have access to list libraries for "+baseUrl+parentSite+"; skipping");
+              Logging.connectors.debug("SharePoint: The crawl user did not have access to list lists for "+baseUrl+parentSite+"; skipping");
             return null;
           }
           else if (httpErrorCode.equals("403"))
@@ -983,7 +1143,7 @@ public class SPSProxyHelper {
           {
             // List did not exist
             if (Logging.connectors.isDebugEnabled())
-              Logging.connectors.debug("SharePoint: The list "+docLibrary+" in site "+parentSite+" did not exist; assuming library deleted");
+              Logging.connectors.debug("SharePoint: The list "+listName+" in site "+parentSite+" did not exist; assuming list deleted");
             return null;
           }
           else
@@ -995,13 +1155,13 @@ public class SPSProxyHelper {
               if (elem != null)
                 errorString = elem2.getFirstChild().getNodeValue().trim();
 
-              Logging.connectors.debug("SharePoint: Getting library ID for the list "+docLibrary+" in site "+parentSite+" failed with unexpected SharePoint error code "+sharepointErrorCode+": "+errorString+" - Skipping",e);
+              Logging.connectors.debug("SharePoint: Getting list ID for the list "+listName+" in site "+parentSite+" failed with unexpected SharePoint error code "+sharepointErrorCode+": "+errorString+" - Skipping",e);
             }
             return null;
           }
         }
         if (Logging.connectors.isDebugEnabled())
-          Logging.connectors.debug("SharePoint: Unknown SharePoint server error getting library ID for site "+parentSite+" library "+docLibrary+" - axis fault = "+e.getFaultCode().getLocalPart()+", detail = "+e.getFaultString()+" - retrying",e);
+          Logging.connectors.debug("SharePoint: Unknown SharePoint server error getting list ID for site "+parentSite+" list "+listName+" - axis fault = "+e.getFaultCode().getLocalPart()+", detail = "+e.getFaultString()+" - retrying",e);
 
         throw new ServiceInterruption("Unknown SharePoint server error: "+e.getMessage()+" - retrying", e, currentTime + 300000L,
           currentTime + 3 * 60 * 60000L,-1,false);
@@ -1015,7 +1175,7 @@ public class SPSProxyHelper {
       }
 
       if (Logging.connectors.isDebugEnabled())
-        Logging.connectors.debug("SharePoint: Got an unknown remote exception getting library ID for site "+parentSite+" library "+docLibrary+" - axis fault = "+e.getFaultCode().getLocalPart()+", detail = "+e.getFaultString()+" - retrying",e);
+        Logging.connectors.debug("SharePoint: Got an unknown remote exception getting list ID for site "+parentSite+" list "+listName+" - axis fault = "+e.getFaultCode().getLocalPart()+", detail = "+e.getFaultString()+" - retrying",e);
       throw new ServiceInterruption("Remote procedure exception: "+e.getMessage(), e, currentTime + 300000L,
         currentTime + 3 * 60 * 60000L,-1,false);
     }
@@ -1024,7 +1184,7 @@ public class SPSProxyHelper {
       // We expect the axis exception to be thrown, not this generic one!
       // So, fail hard if we see it.
       if (Logging.connectors.isDebugEnabled())
-        Logging.connectors.debug("SharePoint: Got an unexpected remote exception getting library ID for site "+parentSite+" library "+docLibrary,e);
+        Logging.connectors.debug("SharePoint: Got an unexpected remote exception getting list ID for site "+parentSite+" list "+listName,e);
       throw new ManifoldCFException("Unexpected remote procedure exception: "+e.getMessage(), e);
     }
   }
@@ -1938,7 +2098,7 @@ public class SPSProxyHelper {
   /**
   * Gets a list of document libraries given a parent site
   * @param parentSite the site to search for document libraries, empty string for root
-  * @return lists of NameValue objects, representing document libraries or lists
+  * @return lists of NameValue objects, representing document libraries
   */
   public ArrayList getDocumentLibraries( String parentSite, String parentSiteDecoded )
     throws ManifoldCFException, ServiceInterruption
@@ -2022,7 +2182,116 @@ public class SPSProxyHelper {
             }
           }
         }
-        else if ( baseType.equals( "0" ) )
+      }
+
+      return result;
+    }
+    catch (java.net.MalformedURLException e)
+    {
+      throw new ManifoldCFException("Bad SharePoint url: "+e.getMessage(),e);
+    }
+    catch (javax.xml.rpc.ServiceException e)
+    {
+      if (Logging.connectors.isDebugEnabled())
+        Logging.connectors.debug("SharePoint: Got a service exception getting document libraries for site "+parentSite+" - retrying",e);
+      currentTime = System.currentTimeMillis();
+      throw new ServiceInterruption("Service exception: "+e.getMessage(), e, currentTime + 300000L,
+        currentTime + 12 * 60 * 60000L,-1,true);
+    }
+    catch (org.apache.axis.AxisFault e)
+    {
+      if (e.getFaultCode().equals(new javax.xml.namespace.QName("http://xml.apache.org/axis/","HTTP")))
+      {
+        org.w3c.dom.Element elem = e.lookupFaultDetail(new javax.xml.namespace.QName("http://xml.apache.org/axis/","HttpErrorCode"));
+        if (elem != null)
+        {
+          elem.normalize();
+          String httpErrorCode = elem.getFirstChild().getNodeValue().trim();
+          if (httpErrorCode.equals("404"))
+            return null;
+          else if (httpErrorCode.equals("403"))
+            throw new ManifoldCFException("Remote procedure exception: "+e.getMessage(),e);
+          else if (httpErrorCode.equals("401"))
+          {
+            if (Logging.connectors.isDebugEnabled())
+              Logging.connectors.debug("SharePoint: Crawl user does not have sufficient privileges to read document libraries for site "+parentSite+" - skipping",e);
+            return null;
+          }
+          throw new ManifoldCFException("Unexpected http error code "+httpErrorCode+" accessing SharePoint at "+baseUrl+parentSite+": "+e.getMessage(),e);
+        }
+        throw new ManifoldCFException("Unknown http error occurred: "+e.getMessage(),e);
+      }
+      if (e.getFaultCode().equals(new javax.xml.namespace.QName("http://schemas.xmlsoap.org/soap/envelope/","Server.userException")))
+      {
+        String exceptionName = e.getFaultString();
+        if (exceptionName.equals("java.lang.InterruptedException"))
+          throw new ManifoldCFException("Interrupted",ManifoldCFException.INTERRUPTED);
+      }
+      if (Logging.connectors.isDebugEnabled())
+        Logging.connectors.debug("SharePoint: Got a remote exception reading document libraries for site "+parentSite+" - retrying",e);
+      currentTime = System.currentTimeMillis();
+      throw new ServiceInterruption("Remote procedure exception: "+e.getMessage(), e, currentTime + 300000L,
+        currentTime + 3 * 60 * 60000L,-1,false);
+    }
+    catch (java.rmi.RemoteException e)
+    {
+      throw new ManifoldCFException("Unexpected remote exception occurred: "+e.getMessage(),e);
+    }
+  }
+
+  /**
+  * Gets a list of lists given a parent site
+  * @param parentSite the site to search for lists, empty string for root
+  * @return lists of NameValue objects, representing lists
+  */
+  public ArrayList getLists( String parentSite, String parentSiteDecoded )
+    throws ManifoldCFException, ServiceInterruption
+  {
+    long currentTime;
+    try
+    {
+      ArrayList result = new ArrayList();
+
+      String parentSiteRequest = parentSite;
+
+      if ( parentSiteRequest.equals("/"))
+      {
+        parentSiteRequest = ""; // root case
+        parentSiteDecoded = "";
+      }
+
+      ListsWS listsService = new ListsWS( baseUrl + parentSiteRequest, userName, password, myFactory, configuration, connectionManager );
+      ListsSoap listsCall = listsService.getListsSoapHandler( );
+
+      GetListCollectionResponseGetListCollectionResult listResp = listsCall.getListCollection();
+      org.apache.axis.message.MessageElement[] lists = listResp.get_any();
+
+      //if ( parentSite.compareTo("/Sample2") == 0) System.out.println( lists[0].toString() );
+
+      XMLDoc doc = new XMLDoc( lists[0].toString() );
+      ArrayList nodeList = new ArrayList();
+
+      doc.processPath(nodeList, "*", null);
+      if (nodeList.size() != 1)
+      {
+        throw new ManifoldCFException("Bad xml - missing outer 'ns1:Lists' node - there are "+Integer.toString(nodeList.size())+" nodes");
+      }
+      Object parent = nodeList.get(0);
+      if (!doc.getNodeName(parent).equals("ns1:Lists"))
+        throw new ManifoldCFException("Bad xml - outer node is not 'ns1:Lists'");
+
+      nodeList.clear();
+      doc.processPath(nodeList, "*", parent);  // <ns1:Lists>
+
+      int chuckIndex = decodedServerLocation.length() + parentSiteDecoded.length();
+
+      int i = 0;
+      while (i < nodeList.size())
+      {
+        Object o = nodeList.get( i++ );
+
+        String baseType = doc.getValue( o, "BaseType");
+        if ( baseType.equals( "0" ) )
         {
           // We think it's a list
 
@@ -2077,7 +2346,7 @@ public class SPSProxyHelper {
     catch (javax.xml.rpc.ServiceException e)
     {
       if (Logging.connectors.isDebugEnabled())
-        Logging.connectors.debug("SharePoint: Got a service exception getting document libraries and lists for site "+parentSite+" - retrying",e);
+        Logging.connectors.debug("SharePoint: Got a service exception getting lists for site "+parentSite+" - retrying",e);
       currentTime = System.currentTimeMillis();
       throw new ServiceInterruption("Service exception: "+e.getMessage(), e, currentTime + 300000L,
         currentTime + 12 * 60 * 60000L,-1,true);
@@ -2098,7 +2367,7 @@ public class SPSProxyHelper {
           else if (httpErrorCode.equals("401"))
           {
             if (Logging.connectors.isDebugEnabled())
-              Logging.connectors.debug("SharePoint: Crawl user does not have sufficient privileges to read document libraries for site "+parentSite+" - skipping",e);
+              Logging.connectors.debug("SharePoint: Crawl user does not have sufficient privileges to read lists for site "+parentSite+" - skipping",e);
             return null;
           }
           throw new ManifoldCFException("Unexpected http error code "+httpErrorCode+" accessing SharePoint at "+baseUrl+parentSite+": "+e.getMessage(),e);
@@ -2112,7 +2381,7 @@ public class SPSProxyHelper {
           throw new ManifoldCFException("Interrupted",ManifoldCFException.INTERRUPTED);
       }
       if (Logging.connectors.isDebugEnabled())
-        Logging.connectors.debug("SharePoint: Got a remote exception reading document libraries for site "+parentSite+" - retrying",e);
+        Logging.connectors.debug("SharePoint: Got a remote exception reading lists for site "+parentSite+" - retrying",e);
       currentTime = System.currentTimeMillis();
       throw new ServiceInterruption("Remote procedure exception: "+e.getMessage(), e, currentTime + 300000L,
         currentTime + 3 * 60 * 60000L,-1,false);