You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2013/09/22 21:52:28 UTC

svn commit: r1525420 [1/3] - in /manifoldcf/trunk: ./ connectors/sharepoint/ connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/ framework/core/src/main/java/org/apache/manifoldcf/core/common/ framework/pu...

Author: kwright
Date: Sun Sep 22 19:52:28 2013
New Revision: 1525420

URL: http://svn.apache.org/r1525420
Log:
Fix for CONNECTORS-778.

Modified:
    manifoldcf/trunk/   (props changed)
    manifoldcf/trunk/CHANGES.txt
    manifoldcf/trunk/connectors/sharepoint/   (props changed)
    manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java
    manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
    manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/common/DateParser.java
    manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IJobManager.java
    manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IProcessActivity.java
    manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java

Propchange: manifoldcf/trunk/
------------------------------------------------------------------------------
  Merged /manifoldcf/branches/CONNECTORS-778:r1524564-1525413

Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1525420&r1=1525419&r2=1525420&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Sun Sep 22 19:52:28 2013
@@ -3,6 +3,10 @@ $Id$
 
 ======================= 1.4-dev =====================
 
+CONNECTORS-778: Add support for attachments in SharePoint
+connector.
+(Dmitry Goldenberg, Karl Wright)
+
 CONNECTORS-772: SharePoint connector wasn't working against
 certain SharePoint instances.  The reason was that some paths
 were relative and some absolute.  Fixed ALL path management to

Propchange: manifoldcf/trunk/connectors/sharepoint/
------------------------------------------------------------------------------
  Merged /manifoldcf/branches/CONNECTORS-778/connectors/sharepoint:r1524564-1525413

Modified: manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java?rev=1525420&r1=1525419&r2=1525420&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java (original)
+++ manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java Sun Sep 22 19:52:28 2013
@@ -1531,6 +1531,125 @@ public class SPSProxyHelper {
     }
   }
 
+  /** Gets a list of attachment URLs, given a site, list name, and list item ID.  These will be returned
+  * as name/value pairs; the "name" is the name of the attachment, and the "value" is the full URL.
+  */
+  public List<NameValue> getAttachmentNames( String site, String listName, String itemID )
+    throws ManifoldCFException, ServiceInterruption
+  {
+    long currentTime;
+    try
+    {
+      ArrayList<NameValue> result = new ArrayList<NameValue>();
+      
+      if (Logging.connectors.isDebugEnabled())
+        Logging.connectors.debug("SharePoint: In getAttachmentNames; site='"+site+"', listName='"+listName+"', itemID='"+itemID+"'");
+
+      // The docLibrary must be a GUID, because we don't have  title.
+
+      if ( site.compareTo( "/") == 0 )
+        site = "";
+      ListsWS listService = new ListsWS( baseUrl + site, userName, password, configuration, httpClient );
+      ListsSoap listCall = listService.getListsSoapHandler();
+
+      GetAttachmentCollectionResponseGetAttachmentCollectionResult listResponse =
+        listCall.getAttachmentCollection( listName, itemID );
+      org.apache.axis.message.MessageElement[] List = listResponse.get_any();
+
+      XMLDoc doc = new XMLDoc( List[0].toString() );
+      ArrayList nodeList = new ArrayList();
+
+      doc.processPath(nodeList, "*", null);
+      if (nodeList.size() != 1)
+      {
+        throw new ManifoldCFException("Bad xml - missing outer node - there are "+Integer.toString(nodeList.size())+" nodes");
+      }
+
+      Object attachments = nodeList.get(0);
+      if ( !doc.getNodeName(attachments).equals("ns1:Attachments") )
+        throw new ManifoldCFException( "Bad xml - outer node '" + doc.getNodeName(attachments) + "' is not 'ns1:Attachments'");
+
+      nodeList.clear();
+      doc.processPath(nodeList, "*", attachments);
+
+      int i = 0;
+      while (i < nodeList.size())
+      {
+        Object o = nodeList.get( i++ );
+        if ( !doc.getNodeName(o).equals("ns1:Attachment") )
+          throw new ManifoldCFException( "Bad xml - inner node '" + doc.getNodeName(o) + "' is not 'ns1:Attachment'");
+        String attachmentURL = doc.getData( o );
+        if (attachmentURL != null)
+        {
+          int index = attachmentURL.lastIndexOf("/");
+          if (index == -1)
+            throw new ManifoldCFException("Unexpected attachment URL form: '"+attachmentURL+"'");
+          result.add(new NameValue(attachmentURL.substring(index+1), new java.net.URL(attachmentURL).getPath()));
+        }
+      }
+
+      return result;
+    }
+    catch (java.net.MalformedURLException e)
+    {
+      throw new ManifoldCFException("Bad SharePoint url: "+e.getMessage(),e);
+    }
+    catch (javax.xml.rpc.ServiceException e)
+    {
+      if (Logging.connectors.isDebugEnabled())
+        Logging.connectors.debug("SharePoint: Got a service exception getting attachments for site "+site+" listName "+listName+" itemID "+itemID+" - retrying",e);
+      currentTime = System.currentTimeMillis();
+      throw new ServiceInterruption("Service exception: "+e.getMessage(), e, currentTime + 300000L,
+        currentTime + 12 * 60 * 60000L,-1,true);
+    }
+    catch (org.apache.axis.AxisFault e)
+    {
+      if (e.getFaultCode().equals(new javax.xml.namespace.QName("http://xml.apache.org/axis/","HTTP")))
+      {
+        org.w3c.dom.Element elem = e.lookupFaultDetail(new javax.xml.namespace.QName("http://xml.apache.org/axis/","HttpErrorCode"));
+        if (elem != null)
+        {
+          elem.normalize();
+          String httpErrorCode = elem.getFirstChild().getNodeValue().trim();
+          if (httpErrorCode.equals("404"))
+            return null;
+          else if (httpErrorCode.equals("403"))
+            throw new ManifoldCFException("Remote procedure exception: "+e.getMessage(),e);
+          else if (httpErrorCode.equals("401"))
+          {
+            if (Logging.connectors.isDebugEnabled())
+              Logging.connectors.debug("SharePoint: Crawl user does not have sufficient privileges to get attachment list for site "+site+" listName "+listName+" itemID "+itemID+" - skipping",e);
+            return null;
+          }
+          throw new ManifoldCFException("Unexpected http error code "+httpErrorCode+" accessing SharePoint at "+baseUrl+site+": "+e.getMessage(),e);
+        }
+        throw new ManifoldCFException("Unknown http error occurred: "+e.getMessage(),e);
+      }
+
+      if (e.getFaultCode().equals(new javax.xml.namespace.QName("http://schemas.xmlsoap.org/soap/envelope/","Server.userException")))
+      {
+        String exceptionName = e.getFaultString();
+        if (exceptionName.equals("java.lang.InterruptedException"))
+          throw new ManifoldCFException("Interrupted",ManifoldCFException.INTERRUPTED);
+      }
+
+      // I don't know if this is what you get when the library is missing, but here's hoping.
+      if (e.getMessage().indexOf("List does not exist") != -1)
+        return null;
+
+      if (Logging.connectors.isDebugEnabled())
+        Logging.connectors.debug("SharePoint: Got a remote exception getting attachments for site "+site+" listName "+listName+" itemID "+itemID+" - retrying",e);
+      currentTime = System.currentTimeMillis();
+      throw new ServiceInterruption("Remote procedure exception: "+e.getMessage(), e, currentTime + 300000L,
+        currentTime + 3 * 60 * 60000L,-1,false);
+    }
+    catch (java.rmi.RemoteException e)
+    {
+      throw new ManifoldCFException("Unexpected remote exception occurred: "+e.getMessage(),e);
+    }
+
+  }
+  
   /**
   * Gets a list of field names of the given document library
   * @param site
@@ -1550,8 +1669,9 @@ public class SPSProxyHelper {
 
       // The docLibrary must be a GUID, because we don't have  title.
 
-      if ( site.compareTo( "/") == 0 ) site = "";
-        ListsWS listService = new ListsWS( baseUrl + site, userName, password, configuration, httpClient );
+      if ( site.compareTo( "/") == 0 )
+        site = "";
+      ListsWS listService = new ListsWS( baseUrl + site, userName, password, configuration, httpClient );
       ListsSoap listCall = listService.getListsSoapHandler();
 
       GetListResponseGetListResult listResponse = listCall.getList( listName );
@@ -1666,13 +1786,13 @@ public class SPSProxyHelper {
   * @param docId
   * @return set of the field values
   */
-  public Map getFieldValues( ArrayList fieldNames, String site, String docLibrary, String docId, boolean dspStsWorks )
+  public Map<String,String> getFieldValues( ArrayList fieldNames, String site, String docLibrary, String docId, boolean dspStsWorks )
     throws ManifoldCFException, ServiceInterruption
   {
     long currentTime;
     try
     {
-      HashMap result = new HashMap();
+      HashMap<String,String> result = new HashMap<String,String>();
 
       if ( site.compareTo("/") == 0 ) site = ""; // root case
 
@@ -1861,7 +1981,7 @@ public class SPSProxyHelper {
           String attrValue = doc.getValue(o,"ows_"+(String)attrName);
           if (attrValue != null)
           {
-            result.put(attrName,valueMunge(attrValue));
+            result.put(attrName.toString(),valueMunge(attrValue));
           }
         }
       }