You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2013/09/17 14:13:39 UTC

svn commit: r1524000 - in /manifoldcf/branches/CONNECTORS-772/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint: SPSProxyHelper.java SharePointRepository.java

Author: kwright
Date: Tue Sep 17 12:13:39 2013
New Revision: 1524000

URL: http://svn.apache.org/r1524000
Log:
Change the way we deal with relative URLs, so they work consistently across all versions of SharePoint etc.

Modified:
    manifoldcf/branches/CONNECTORS-772/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java
    manifoldcf/branches/CONNECTORS-772/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java

Modified: manifoldcf/branches/CONNECTORS-772/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-772/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java?rev=1524000&r1=1523999&r2=1524000&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-772/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java (original)
+++ manifoldcf/branches/CONNECTORS-772/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java Tue Sep 17 12:13:39 2013
@@ -589,21 +589,6 @@ public class SPSProxyHelper {
           Object node = nodeDocs.get(j);
           Logging.connectors.debug( node.toString() );
           String relPath = docs.getData( docs.getElement( node, "FileRef" ) );
-
-          // This relative path is apparently from the domain on down; if there's a location offset we therefore
-          // need to get rid of it before checking the document against the site/library tuples.  The recorded
-          // document identifier should also not include it.
-
-          if (!relPath.toLowerCase().startsWith(serverLocation.toLowerCase()))
-          {
-            // Unexpected processing error; the path to the folder or document did not start with the location
-            // offset, so throw up.
-            throw new ManifoldCFException("Internal error: Relative path '"+relPath+"' was expected to start with '"+
-              serverLocation+"'");
-          }
-
-          relPath = relPath.substring(serverLocation.length());
-
           fileStream.addFile( relPath );
         }
       }
@@ -649,9 +634,6 @@ public class SPSProxyHelper {
                 {
                   resultCount++;
                   String relPath = result.getAttribute("FileRef");
-
-                  relPath = "/" + relPath;
-
                   fileStream.addFile( relPath );
                 }
               }

Modified: manifoldcf/branches/CONNECTORS-772/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-772/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java?rev=1524000&r1=1523999&r2=1524000&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-772/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java (original)
+++ manifoldcf/branches/CONNECTORS-772/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java Tue Sep 17 12:13:39 2013
@@ -1406,13 +1406,10 @@ public class SharePointRepository extend
             if (Logging.connectors.isDebugEnabled())
               Logging.connectors.debug( "SharePoint: Document identifier is a library: '" + siteLibPath + "'" );
 
-            // Calculate the start of the path part that would contain the folders/file
-            int foldersFilePathIndex = encodedServerLocation.length() + site.length() + 1 + libName.length();
-
             String libID = proxy.getDocLibID( encodePath(site), site, libName, fullListPaths );
             if (libID != null)
             {
-              FileStream fs = new FileStream( activities, foldersFilePathIndex, spec );
+              FileStream fs = new FileStream( activities, encodedServerLocation, siteLibPath, spec );
               boolean success = proxy.getChildren( fs, encodePath(site) , libID, dspStsWorks );
               if (!success)
               {
@@ -1874,35 +1871,54 @@ public class SharePointRepository extend
 
   protected class FileStream implements IFileStream
   {
-    protected IProcessActivity activities;
-    protected int foldersFilePathIndex;
-    protected DocumentSpecification spec;
+    protected final IProcessActivity activities;
+    protected final DocumentSpecification spec;
+    protected final String rootPath;
+    protected final String siteLibPath;
     
-    public FileStream(IProcessActivity activities, int foldersFilePathIndex, DocumentSpecification spec)
+    public FileStream(IProcessActivity activities, String rootPath, String siteLibPath, DocumentSpecification spec)
     {
       this.activities = activities;
-      this.foldersFilePathIndex = foldersFilePathIndex;
       this.spec = spec;
+      this.rootPath = rootPath;
+      this.siteLibPath = siteLibPath;
     }
     
     public void addFile(String relPath)
       throws ManifoldCFException
     {
-      if ( checkIncludeFile( relPath, spec ) )
+
+      // First, convert the relative path to a full path
+      if ( !relPath.startsWith("/") )
       {
-        // Since the processing for a file needs to know the library path, we need a way to signal the cutoff between library and folder levels.
-        // The way I've chosen to do this is to use a double slash at that point, as a separator.
-        if (relPath.length() >= foldersFilePathIndex)
+        relPath = rootPath + siteLibPath + "/" + relPath;
+      }
+      
+      // Now, strip away what we don't want - namely, the root path.  This makes the path relative to the root.
+      if ( relPath.length() >= rootPath.length() )
+      {
+        relPath = relPath.substring(rootPath.length());
+      
+        if ( checkIncludeFile( relPath, spec ) )
         {
-          String modifiedPath = relPath.substring(0,foldersFilePathIndex) + "/" + relPath.substring(foldersFilePathIndex);
+          // Since the processing for a file needs to know the library path, we need a way to signal the cutoff between library and folder levels.
+          // The way I've chosen to do this is to use a double slash at that point, as a separator.
+          if (relPath.length() >= siteLibPath.length())
+          {
+            String modifiedPath = relPath.substring(0,siteLibPath.length()) + "/" + relPath.substring(siteLibPath.length());
 
-          activities.addDocumentReference( modifiedPath );
-        }
-        else
-        {
-          Logging.connectors.warn("Sharepoint: Unexpected relPath structure; path is '"+relPath+"', but expected <list/library> length of "+foldersFilePathIndex);
+            activities.addDocumentReference( modifiedPath );
+          }
+          else
+          {
+            Logging.connectors.warn("SharePoint: Unexpected relPath structure; path is '"+relPath+"', but expected to see something beginning with "+siteLibPath);
+          }
         }
       }
+      else
+      {
+        Logging.connectors.warn("SharePoint: Unexpected relPath structure; path is "+relPath+", but expected to see something beginning with "+rootPath);
+      }
     }
   }