You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2013/09/22 21:52:28 UTC
svn commit: r1525420 [1/3] - in /manifoldcf/trunk: ./ connectors/sharepoint/
connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/
framework/core/src/main/java/org/apache/manifoldcf/core/common/
framework/pu...
Author: kwright
Date: Sun Sep 22 19:52:28 2013
New Revision: 1525420
URL: http://svn.apache.org/r1525420
Log:
Fix for CONNECTORS-778.
Modified:
manifoldcf/trunk/ (props changed)
manifoldcf/trunk/CHANGES.txt
manifoldcf/trunk/connectors/sharepoint/ (props changed)
manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java
manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/common/DateParser.java
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IJobManager.java
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IProcessActivity.java
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
Propchange: manifoldcf/trunk/
------------------------------------------------------------------------------
Merged /manifoldcf/branches/CONNECTORS-778:r1524564-1525413
Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1525420&r1=1525419&r2=1525420&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Sun Sep 22 19:52:28 2013
@@ -3,6 +3,10 @@ $Id$
======================= 1.4-dev =====================
+CONNECTORS-778: Add support for attachments in SharePoint
+connector.
+(Dmitry Goldenberg, Karl Wright)
+
CONNECTORS-772: SharePoint connector wasn't working against
certain SharePoint instances. The reason was that some paths
were relative and some absolute. Fixed ALL path management to
Propchange: manifoldcf/trunk/connectors/sharepoint/
------------------------------------------------------------------------------
Merged /manifoldcf/branches/CONNECTORS-778/connectors/sharepoint:r1524564-1525413
Modified: manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java?rev=1525420&r1=1525419&r2=1525420&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java (original)
+++ manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java Sun Sep 22 19:52:28 2013
@@ -1531,6 +1531,125 @@ public class SPSProxyHelper {
}
}
+ /** Gets a list of attachment URLs, given a site, list name, and list item ID. These will be returned
+ * as name/value pairs; the "name" is the name of the attachment, and the "value" is the full URL.
+ */
+ public List<NameValue> getAttachmentNames( String site, String listName, String itemID )
+ throws ManifoldCFException, ServiceInterruption
+ {
+ long currentTime;
+ try
+ {
+ ArrayList<NameValue> result = new ArrayList<NameValue>();
+
+ if (Logging.connectors.isDebugEnabled())
+ Logging.connectors.debug("SharePoint: In getAttachmentNames; site='"+site+"', listName='"+listName+"', itemID='"+itemID+"'");
+
+ // The docLibrary must be a GUID, because we don't have title.
+
+ if ( site.compareTo( "/") == 0 )
+ site = "";
+ ListsWS listService = new ListsWS( baseUrl + site, userName, password, configuration, httpClient );
+ ListsSoap listCall = listService.getListsSoapHandler();
+
+ GetAttachmentCollectionResponseGetAttachmentCollectionResult listResponse =
+ listCall.getAttachmentCollection( listName, itemID );
+ org.apache.axis.message.MessageElement[] List = listResponse.get_any();
+
+ XMLDoc doc = new XMLDoc( List[0].toString() );
+ ArrayList nodeList = new ArrayList();
+
+ doc.processPath(nodeList, "*", null);
+ if (nodeList.size() != 1)
+ {
+ throw new ManifoldCFException("Bad xml - missing outer node - there are "+Integer.toString(nodeList.size())+" nodes");
+ }
+
+ Object attachments = nodeList.get(0);
+ if ( !doc.getNodeName(attachments).equals("ns1:Attachments") )
+ throw new ManifoldCFException( "Bad xml - outer node '" + doc.getNodeName(attachments) + "' is not 'ns1:Attachments'");
+
+ nodeList.clear();
+ doc.processPath(nodeList, "*", attachments);
+
+ int i = 0;
+ while (i < nodeList.size())
+ {
+ Object o = nodeList.get( i++ );
+ if ( !doc.getNodeName(o).equals("ns1:Attachment") )
+ throw new ManifoldCFException( "Bad xml - inner node '" + doc.getNodeName(o) + "' is not 'ns1:Attachment'");
+ String attachmentURL = doc.getData( o );
+ if (attachmentURL != null)
+ {
+ int index = attachmentURL.lastIndexOf("/");
+ if (index == -1)
+ throw new ManifoldCFException("Unexpected attachment URL form: '"+attachmentURL+"'");
+ result.add(new NameValue(attachmentURL.substring(index+1), new java.net.URL(attachmentURL).getPath()));
+ }
+ }
+
+ return result;
+ }
+ catch (java.net.MalformedURLException e)
+ {
+ throw new ManifoldCFException("Bad SharePoint url: "+e.getMessage(),e);
+ }
+ catch (javax.xml.rpc.ServiceException e)
+ {
+ if (Logging.connectors.isDebugEnabled())
+ Logging.connectors.debug("SharePoint: Got a service exception getting attachments for site "+site+" listName "+listName+" itemID "+itemID+" - retrying",e);
+ currentTime = System.currentTimeMillis();
+ throw new ServiceInterruption("Service exception: "+e.getMessage(), e, currentTime + 300000L,
+ currentTime + 12 * 60 * 60000L,-1,true);
+ }
+ catch (org.apache.axis.AxisFault e)
+ {
+ if (e.getFaultCode().equals(new javax.xml.namespace.QName("http://xml.apache.org/axis/","HTTP")))
+ {
+ org.w3c.dom.Element elem = e.lookupFaultDetail(new javax.xml.namespace.QName("http://xml.apache.org/axis/","HttpErrorCode"));
+ if (elem != null)
+ {
+ elem.normalize();
+ String httpErrorCode = elem.getFirstChild().getNodeValue().trim();
+ if (httpErrorCode.equals("404"))
+ return null;
+ else if (httpErrorCode.equals("403"))
+ throw new ManifoldCFException("Remote procedure exception: "+e.getMessage(),e);
+ else if (httpErrorCode.equals("401"))
+ {
+ if (Logging.connectors.isDebugEnabled())
+ Logging.connectors.debug("SharePoint: Crawl user does not have sufficient privileges to get attachment list for site "+site+" listName "+listName+" itemID "+itemID+" - skipping",e);
+ return null;
+ }
+ throw new ManifoldCFException("Unexpected http error code "+httpErrorCode+" accessing SharePoint at "+baseUrl+site+": "+e.getMessage(),e);
+ }
+ throw new ManifoldCFException("Unknown http error occurred: "+e.getMessage(),e);
+ }
+
+ if (e.getFaultCode().equals(new javax.xml.namespace.QName("http://schemas.xmlsoap.org/soap/envelope/","Server.userException")))
+ {
+ String exceptionName = e.getFaultString();
+ if (exceptionName.equals("java.lang.InterruptedException"))
+ throw new ManifoldCFException("Interrupted",ManifoldCFException.INTERRUPTED);
+ }
+
+ // I don't know if this is what you get when the library is missing, but here's hoping.
+ if (e.getMessage().indexOf("List does not exist") != -1)
+ return null;
+
+ if (Logging.connectors.isDebugEnabled())
+ Logging.connectors.debug("SharePoint: Got a remote exception getting attachments for site "+site+" listName "+listName+" itemID "+itemID+" - retrying",e);
+ currentTime = System.currentTimeMillis();
+ throw new ServiceInterruption("Remote procedure exception: "+e.getMessage(), e, currentTime + 300000L,
+ currentTime + 3 * 60 * 60000L,-1,false);
+ }
+ catch (java.rmi.RemoteException e)
+ {
+ throw new ManifoldCFException("Unexpected remote exception occurred: "+e.getMessage(),e);
+ }
+
+ }
+
/**
* Gets a list of field names of the given document library
* @param site
@@ -1550,8 +1669,9 @@ public class SPSProxyHelper {
// The docLibrary must be a GUID, because we don't have title.
- if ( site.compareTo( "/") == 0 ) site = "";
- ListsWS listService = new ListsWS( baseUrl + site, userName, password, configuration, httpClient );
+ if ( site.compareTo( "/") == 0 )
+ site = "";
+ ListsWS listService = new ListsWS( baseUrl + site, userName, password, configuration, httpClient );
ListsSoap listCall = listService.getListsSoapHandler();
GetListResponseGetListResult listResponse = listCall.getList( listName );
@@ -1666,13 +1786,13 @@ public class SPSProxyHelper {
* @param docId
* @return set of the field values
*/
- public Map getFieldValues( ArrayList fieldNames, String site, String docLibrary, String docId, boolean dspStsWorks )
+ public Map<String,String> getFieldValues( ArrayList fieldNames, String site, String docLibrary, String docId, boolean dspStsWorks )
throws ManifoldCFException, ServiceInterruption
{
long currentTime;
try
{
- HashMap result = new HashMap();
+ HashMap<String,String> result = new HashMap<String,String>();
if ( site.compareTo("/") == 0 ) site = ""; // root case
@@ -1861,7 +1981,7 @@ public class SPSProxyHelper {
String attrValue = doc.getValue(o,"ows_"+(String)attrName);
if (attrValue != null)
{
- result.put(attrName,valueMunge(attrValue));
+ result.put(attrName.toString(),valueMunge(attrValue));
}
}
}