You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2013/09/17 14:13:39 UTC
svn commit: r1524000 - in
/manifoldcf/branches/CONNECTORS-772/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint:
SPSProxyHelper.java SharePointRepository.java
Author: kwright
Date: Tue Sep 17 12:13:39 2013
New Revision: 1524000
URL: http://svn.apache.org/r1524000
Log:
Change the way we deal with relative URLs, so they work consistently across all versions of SharePoint etc.
Modified:
manifoldcf/branches/CONNECTORS-772/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java
manifoldcf/branches/CONNECTORS-772/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
Modified: manifoldcf/branches/CONNECTORS-772/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-772/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java?rev=1524000&r1=1523999&r2=1524000&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-772/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java (original)
+++ manifoldcf/branches/CONNECTORS-772/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java Tue Sep 17 12:13:39 2013
@@ -589,21 +589,6 @@ public class SPSProxyHelper {
Object node = nodeDocs.get(j);
Logging.connectors.debug( node.toString() );
String relPath = docs.getData( docs.getElement( node, "FileRef" ) );
-
- // This relative path is apparently from the domain on down; if there's a location offset we therefore
- // need to get rid of it before checking the document against the site/library tuples. The recorded
- // document identifier should also not include it.
-
- if (!relPath.toLowerCase().startsWith(serverLocation.toLowerCase()))
- {
- // Unexpected processing error; the path to the folder or document did not start with the location
- // offset, so throw up.
- throw new ManifoldCFException("Internal error: Relative path '"+relPath+"' was expected to start with '"+
- serverLocation+"'");
- }
-
- relPath = relPath.substring(serverLocation.length());
-
fileStream.addFile( relPath );
}
}
@@ -649,9 +634,6 @@ public class SPSProxyHelper {
{
resultCount++;
String relPath = result.getAttribute("FileRef");
-
- relPath = "/" + relPath;
-
fileStream.addFile( relPath );
}
}
Modified: manifoldcf/branches/CONNECTORS-772/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-772/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java?rev=1524000&r1=1523999&r2=1524000&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-772/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java (original)
+++ manifoldcf/branches/CONNECTORS-772/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java Tue Sep 17 12:13:39 2013
@@ -1406,13 +1406,10 @@ public class SharePointRepository extend
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug( "SharePoint: Document identifier is a library: '" + siteLibPath + "'" );
- // Calculate the start of the path part that would contain the folders/file
- int foldersFilePathIndex = encodedServerLocation.length() + site.length() + 1 + libName.length();
-
String libID = proxy.getDocLibID( encodePath(site), site, libName, fullListPaths );
if (libID != null)
{
- FileStream fs = new FileStream( activities, foldersFilePathIndex, spec );
+ FileStream fs = new FileStream( activities, encodedServerLocation, siteLibPath, spec );
boolean success = proxy.getChildren( fs, encodePath(site) , libID, dspStsWorks );
if (!success)
{
@@ -1874,35 +1871,54 @@ public class SharePointRepository extend
protected class FileStream implements IFileStream
{
- protected IProcessActivity activities;
- protected int foldersFilePathIndex;
- protected DocumentSpecification spec;
+ protected final IProcessActivity activities;
+ protected final DocumentSpecification spec;
+ protected final String rootPath;
+ protected final String siteLibPath;
- public FileStream(IProcessActivity activities, int foldersFilePathIndex, DocumentSpecification spec)
+ public FileStream(IProcessActivity activities, String rootPath, String siteLibPath, DocumentSpecification spec)
{
this.activities = activities;
- this.foldersFilePathIndex = foldersFilePathIndex;
this.spec = spec;
+ this.rootPath = rootPath;
+ this.siteLibPath = siteLibPath;
}
public void addFile(String relPath)
throws ManifoldCFException
{
- if ( checkIncludeFile( relPath, spec ) )
+
+ // First, convert the relative path to a full path
+ if ( !relPath.startsWith("/") )
{
- // Since the processing for a file needs to know the library path, we need a way to signal the cutoff between library and folder levels.
- // The way I've chosen to do this is to use a double slash at that point, as a separator.
- if (relPath.length() >= foldersFilePathIndex)
+ relPath = rootPath + siteLibPath + "/" + relPath;
+ }
+
+ // Now, strip away what we don't want - namely, the root path. This makes the path relative to the root.
+ if ( relPath.length() >= rootPath.length() )
+ {
+ relPath = relPath.substring(rootPath.length());
+
+ if ( checkIncludeFile( relPath, spec ) )
{
- String modifiedPath = relPath.substring(0,foldersFilePathIndex) + "/" + relPath.substring(foldersFilePathIndex);
+ // Since the processing for a file needs to know the library path, we need a way to signal the cutoff between library and folder levels.
+ // The way I've chosen to do this is to use a double slash at that point, as a separator.
+ if (relPath.length() >= siteLibPath.length())
+ {
+ String modifiedPath = relPath.substring(0,siteLibPath.length()) + "/" + relPath.substring(siteLibPath.length());
- activities.addDocumentReference( modifiedPath );
- }
- else
- {
- Logging.connectors.warn("Sharepoint: Unexpected relPath structure; path is '"+relPath+"', but expected <list/library> length of "+foldersFilePathIndex);
+ activities.addDocumentReference( modifiedPath );
+ }
+ else
+ {
+ Logging.connectors.warn("SharePoint: Unexpected relPath structure; path is '"+relPath+"', but expected to see something beginning with "+siteLibPath);
+ }
}
}
+ else
+ {
+ Logging.connectors.warn("SharePoint: Unexpected relPath structure; path is "+relPath+", but expected to see something beginning with "+rootPath);
+ }
}
}