You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2012/07/16 16:45:21 UTC

svn commit: r1362069 - in /manifoldcf/trunk: ./ connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/ connectors/sharepoint/connector/src/test/java/org/ framework/core/src/main/java/org/apache/manifoldcf/cor...

Author: kwright
Date: Mon Jul 16 14:45:20 2012
New Revision: 1362069

URL: http://svn.apache.org/viewvc?rev=1362069&view=rev
Log:
Part of the fix for CONNECTORS-470.  Update SharePoint connector to work with SharePoint 2010.

Added:
    manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/IFileStream.java
      - copied unchanged from r1349740, incubator/lcf/branches/CONNECTORS-470/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/IFileStream.java
    manifoldcf/trunk/connectors/sharepoint/connector/src/test/java/org/
      - copied from r1360750, manifoldcf/branches/CONNECTORS-470/connectors/sharepoint/connector/src/test/java/org/
Modified:
    manifoldcf/trunk/   (props changed)
    manifoldcf/trunk/CHANGES.txt
    manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java
    manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
    manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/common/XMLDoc.java

Propchange: manifoldcf/trunk/
------------------------------------------------------------------------------
  Merged /incubator/lcf/branches/CONNECTORS-470:r1337828-1349740
  Merged /manifoldcf/branches/CONNECTORS-470:r1349741-1360750,1360808

Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1362069&r1=1362068&r2=1362069&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Mon Jul 16 14:45:20 2012
@@ -3,6 +3,10 @@ $Id$
 
 ======================= 0.7-dev =====================
 
+CONNECTORS-470: Extend SharePoint connector to work with
+SharePoint 2010.
+(Ahmet Arslan, Fatih Samet Cetin, Joe Becknell, Karl Wright)
+
 ======================= Release 0.6 =====================
 
 CONNECTORS-489: Fix user-agent string in web and rss connectors

Modified: manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java?rev=1362069&r1=1362068&r2=1362069&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java (original)
+++ manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java Mon Jul 16 14:45:20 2012
@@ -23,6 +23,7 @@ import java.util.Map;
 import java.util.Hashtable;
 import java.util.Iterator;
 import java.util.List;
+import java.util.regex.*;
 
 import java.io.InputStream;
 
@@ -43,6 +44,7 @@ import org.apache.axis.EngineConfigurati
 
 import javax.xml.namespace.QName;
 
+import org.apache.axis.message.MessageElement;
 import org.apache.axis.AxisEngine;
 import org.apache.axis.ConfigurationException;
 import org.apache.axis.Handler;
@@ -505,67 +507,205 @@ public class SPSProxyHelper {
   * @throws ManifoldCFException
   * @throws ServiceInterruption
   */
-  public XMLDoc getDocuments(String site, String docLibrary)
+  public boolean getDocuments(IFileStream fileStream, String site, String docLibrary, boolean dspStsWorks )
     throws ManifoldCFException, ServiceInterruption
   {
     long currentTime;
     try
     {
       if ( site.equals("/") ) site = ""; // root case
+      if ( dspStsWorks )
+      {
         StsAdapterWS listService = new StsAdapterWS( baseUrl + site, userName, password, myFactory, configuration, connectionManager );
-      StsAdapterSoapStub stub = (StsAdapterSoapStub)listService.getStsAdapterSoapHandler();
+        StsAdapterSoapStub stub = (StsAdapterSoapStub)listService.getStsAdapterSoapHandler();
 
-      String[] vArray = new String[1];
-      vArray[0] = "1.0";
-      VersionsHeader myVersion = new VersionsHeader();
-      myVersion.setVersion( vArray );
+        String[] vArray = new String[1];
+        vArray[0] = "1.0";
+        VersionsHeader myVersion = new VersionsHeader();
+        myVersion.setVersion( vArray );
 
-      stub.setHeader( "http://schemas.microsoft.com/sharepoint/dsp", "versions", myVersion );
+        stub.setHeader( "http://schemas.microsoft.com/sharepoint/dsp", "versions", myVersion );
 
-      RequestHeader reqHeader = new RequestHeader();
-      reqHeader.setDocument( DocumentType.content );
-      reqHeader.setMethod(MethodType.query );
+        RequestHeader reqHeader = new RequestHeader();
+        reqHeader.setDocument( DocumentType.content );
+        reqHeader.setMethod(MethodType.query );
 
-      stub.setHeader( "http://schemas.microsoft.com/sharepoint/dsp", "request", reqHeader );
+        stub.setHeader( "http://schemas.microsoft.com/sharepoint/dsp", "request", reqHeader );
 
-      QueryRequest myRequest = new QueryRequest();
+        QueryRequest myRequest = new QueryRequest();
 
-      DSQuery sQuery = new DSQuery();
-      sQuery.setSelect( "/list[@id='" + docLibrary + "']" );
-      myRequest.setDsQuery( sQuery );
+        DSQuery sQuery = new DSQuery();
+        sQuery.setSelect( "/list[@id='" + docLibrary + "']" );
+        myRequest.setDsQuery( sQuery );
 
-      StsAdapterSoap call = stub;
-      ArrayList nodeList = new ArrayList();
+        StsAdapterSoap call = stub;
+        ArrayList nodeList = new ArrayList();
 
-      QueryResponse resp = call.query( myRequest );
-      org.apache.axis.message.MessageElement[] list = resp.get_any();
-      if (Logging.connectors.isInfoEnabled())
-      {
-        Logging.connectors.info("SharePoint: list xml: '" + list[0].toString() + "'");
-      }
+        QueryResponse resp = call.query( myRequest );
+        org.apache.axis.message.MessageElement[] list = resp.get_any();
+        if (Logging.connectors.isDebugEnabled())
+        {
+          Logging.connectors.debug("SharePoint: list xml: '" + list[0].toString() + "'");
+        }
 
-      XMLDoc doc = new XMLDoc( list[0].toString() );
+        XMLDoc doc = new XMLDoc( list[0].toString() );
 
-      doc.processPath(nodeList, "*", null);
-      if (nodeList.size() != 1)
-      {
-        throw new ManifoldCFException("Bad xml - missing outer 'ns1:dsQueryResponse' node - there are "+Integer.toString(nodeList.size())+" nodes");
-      }
+        doc.processPath(nodeList, "*", null);
+        if (nodeList.size() != 1)
+        {
+          throw new ManifoldCFException("Bad xml - missing outer 'ns1:dsQueryResponse' node - there are "+Integer.toString(nodeList.size())+" nodes");
+        }
 
-      Object parent = nodeList.get(0);
-      //System.out.println( "Outer NodeName = " + doc.getNodeName(parent) );
-      if (!doc.getNodeName(parent).equals("ns1:dsQueryResponse"))
-        throw new ManifoldCFException("Bad xml - outer node is not 'ns1:dsQueryResponse'");
+        Object parent = nodeList.get(0);
+        //System.out.println( "Outer NodeName = " + doc.getNodeName(parent) );
+        if (!doc.getNodeName(parent).equals("ns1:dsQueryResponse"))
+          throw new ManifoldCFException("Bad xml - outer node is not 'ns1:dsQueryResponse'");
 
-      nodeList.clear();
-      doc.processPath(nodeList, "*", parent);
+        nodeList.clear();
+        doc.processPath(nodeList, "*", parent);
 
-      if ( nodeList.size() != 2 )
-      {
-        throw new ManifoldCFException( " No results found." );
+        if ( nodeList.size() != 2 )
+        {
+          throw new ManifoldCFException( " No results found." );
+        }
+
+        // Now, extract the files from the response document
+        XMLDoc docs = doc;
+        ArrayList nodeDocs = new ArrayList();
+
+        docs.processPath( nodeDocs, "*", null );
+        parent = nodeDocs.get(0);                // ns1:dsQueryResponse
+        nodeDocs.clear();
+        docs.processPath(nodeDocs, "*", parent);
+        Object documents = nodeDocs.get(1);
+        nodeDocs.clear();
+        docs.processPath(nodeDocs, "*", documents);
+
+        StringBuilder sb = new StringBuilder();
+        for( int j =0; j < nodeDocs.size(); j++)
+        {
+          Object node = nodeDocs.get(j);
+          Logging.connectors.debug( node.toString() );
+          String relPath = docs.getData( docs.getElement( node, "FileRef" ) );
+
+          // This relative path is apparently from the domain on down; if there's a location offset we therefore
+          // need to get rid of it before checking the document against the site/library tuples.  The recorded
+          // document identifier should also not include it.
+
+          if (!relPath.toLowerCase().startsWith(serverLocation.toLowerCase()))
+          {
+            // Unexpected processing error; the path to the folder or document did not start with the location
+            // offset, so throw up.
+            throw new ManifoldCFException("Internal error: Relative path '"+relPath+"' was expected to start with '"+
+              serverLocation+"'");
+          }
+
+          relPath = relPath.substring(serverLocation.length());
+
+          if ( !relPath.endsWith(".aspx") )
+            fileStream.addFile( relPath );
+        }
       }
+      else
+      {
+        // Sharepoint 2010; use Lists service instead
+        ListsWS lservice = new ListsWS(baseUrl + site, userName, password, myFactory, configuration, connectionManager );
+        ListsSoapStub stub1 = (ListsSoapStub)lservice.getListsSoapHandler();
 
-      return doc;
+        // This string is the paging chunk description.  It gets updated on every chunk we do,
+        // so that the next call finds new data.
+        String nextChunkDescription = "";
+        // Order by some column we know is indexed.
+        GetListItemsQuery orderByQuery = buildOrderedQuery("ID");
+        // Set up fields we want
+        ArrayList fieldList = new ArrayList();
+        fieldList.add("FileRef");
+        GetListItemsViewFields viewFields = buildViewFields(fieldList);
+        // Pick a request size we know will not exceed the limit as set by the administrator.
+        int requestSize = 2000;
+          
+        while (true)
+        {
+          GetListItemsResponseGetListItemsResult items =  stub1.getListItems(docLibrary, "", orderByQuery, viewFields, Integer.toString(requestSize), buildPagingQueryOptions(nextChunkDescription), null);
+          if (items == null)
+            return false;
+
+          org.apache.axis.message.MessageElement[] list = items.get_any();
+
+          if (Logging.connectors.isDebugEnabled()){
+            Logging.connectors.debug("SharePoint: getListItems xml response: '" + list[0].toString() + "'");
+          }
+
+          ArrayList nodeList = new ArrayList();
+          XMLDoc doc = new XMLDoc(list[0].toString());
+
+          doc.processPath(nodeList, "*", null);
+          if (nodeList.size() != 1) {
+            throw new ManifoldCFException("Bad xml - expecting one outer 'ns1:listitems' node - there are " + Integer.toString(nodeList.size()) + " nodes");
+          }
+
+          Object parent = nodeList.get(0);
+          if (!"ns1:listitems".equals(doc.getNodeName(parent)))
+            throw new ManifoldCFException("Bad xml - outer node is not 'ns1:listitems'");
+
+
+          nodeList.clear();
+          doc.processPath(nodeList, "*", parent);
+
+          if (nodeList.size() != 1)
+            throw new ManifoldCFException("Expected rsdata result but no results found.");
+
+          Object rsData = nodeList.get(0);
+
+          // Get the chunk description
+          nextChunkDescription = doc.getValue(rsData, "ListItemCollectionPositionNext");
+
+          int itemCount = Integer.parseInt(doc.getValue(rsData, "ItemCount"));
+
+          // Now, extract the files from the response document
+          XMLDoc docs = doc;
+          ArrayList nodeDocs = new ArrayList();
+
+          docs.processPath(nodeDocs, "*", rsData);
+
+          if (nodeDocs.size() != itemCount) {
+            throw new ManifoldCFException("itemCount does not match with nodeDocs.size().");
+          }
+
+          for (int j = 0; j < nodeDocs.size(); j++)
+          {
+
+            Object node = nodeDocs.get(j);
+
+            String relPath = doc.getValue(node, "ows_FileRef");
+
+            // This relative path is apparently from the domain on down; if there's a location offset we therefore
+            // need to get rid of it before checking the document against the site/library tuples.  The recorded
+            // document identifier should also not include it.
+
+            // KDW: Removed the case changes; URL characters should remain case-sensitive
+            if (!relPath.startsWith(serverLocation))
+            {
+              // Unexpected processing error; the path to the folder or document did not start with the location
+              // offset, so throw up.
+              throw new ManifoldCFException("Internal error: Relative path '"+relPath+"' was expected to start with '"+
+                serverLocation+"'");
+            }
+            relPath = relPath.substring(serverLocation.length());
+
+            relPath = "/" + valueMunge(relPath);
+
+            if (!relPath.endsWith(".aspx")) {
+              fileStream.addFile( relPath );
+            }
+          }
+          
+          if (requestSize > nodeDocs.size())
+            break;
+        }
+      }
+      
+      return true;
     }
     catch (java.net.MalformedURLException e)
     {
@@ -594,14 +734,14 @@ public class SPSProxyHelper {
             // Page did not exist
             if (Logging.connectors.isDebugEnabled())
               Logging.connectors.debug("SharePoint: The page at "+baseUrl+site+" did not exist; assuming library deleted");
-            return null;
+            return false;
           }
           else if (httpErrorCode.equals("401"))
           {
             // User did not have permissions for this library to get the acls
             if (Logging.connectors.isDebugEnabled())
               Logging.connectors.debug("SharePoint: The crawl user did not have access to list documents for "+baseUrl+site+"; skipping documents within");
-            return null;
+            return false;
           }
           else if (httpErrorCode.equals("403"))
             throw new ManifoldCFException("Http error "+httpErrorCode+" while reading from "+baseUrl+site+" - check IIS and SharePoint security settings! "+e.getMessage(),e);
@@ -622,7 +762,7 @@ public class SPSProxyHelper {
             // List did not exist
             if (Logging.connectors.isDebugEnabled())
               Logging.connectors.debug("SharePoint: The list "+docLibrary+" in site "+site+" did not exist; assuming library deleted");
-            return null;
+            return false;
           }
           else
           {
@@ -635,7 +775,7 @@ public class SPSProxyHelper {
 
               Logging.connectors.debug("SharePoint: Getting child documents for the list "+docLibrary+" in site "+site+" failed with unexpected SharePoint error code "+sharepointErrorCode+": "+errorString+" - Skipping",e);
             }
-            return null;
+            return false;
           }
         }
         if (Logging.connectors.isDebugEnabled())
@@ -1036,7 +1176,7 @@ public class SPSProxyHelper {
   /**
   *
   * @param userCall
-  * @param roleName
+  * @param groupName
   * @return
   * @throws Exception
   */
@@ -1377,7 +1517,7 @@ public class SPSProxyHelper {
   * @param docId
   * @return set of the field values
   */
-  public Map getFieldValues( ArrayList fieldNames, String site, String docLibrary, String docId )
+  public Map getFieldValues( ArrayList fieldNames, String site, String docLibrary, String docId, boolean dspStsWorks )
     throws ManifoldCFException, ServiceInterruption
   {
     long currentTime;
@@ -1386,120 +1526,190 @@ public class SPSProxyHelper {
       HashMap result = new HashMap();
 
       if ( site.compareTo("/") == 0 ) site = ""; // root case
+
+      if ( dspStsWorks )
+      {
         StsAdapterWS listService = new StsAdapterWS( baseUrl + site, userName, password, myFactory, configuration, connectionManager );
-      StsAdapterSoapStub stub = (StsAdapterSoapStub)listService.getStsAdapterSoapHandler();
+        StsAdapterSoapStub stub = (StsAdapterSoapStub)listService.getStsAdapterSoapHandler();
 
-      String[] vArray = new String[1];
-      vArray[0] = "1.0";
-      VersionsHeader myVersion = new VersionsHeader();
-      myVersion.setVersion( vArray );
+        String[] vArray = new String[1];
+        vArray[0] = "1.0";
+        VersionsHeader myVersion = new VersionsHeader();
+        myVersion.setVersion( vArray );
 
-      stub.setHeader( "http://schemas.microsoft.com/sharepoint/dsp", "versions", myVersion );
+        stub.setHeader( "http://schemas.microsoft.com/sharepoint/dsp", "versions", myVersion );
 
-      RequestHeader reqHeader = new RequestHeader();
-      reqHeader.setDocument( DocumentType.content );
-      reqHeader.setMethod(MethodType.query );
+        RequestHeader reqHeader = new RequestHeader();
+        reqHeader.setDocument( DocumentType.content );
+        reqHeader.setMethod(MethodType.query );
 
-      stub.setHeader( "http://schemas.microsoft.com/sharepoint/dsp", "request", reqHeader );
+        stub.setHeader( "http://schemas.microsoft.com/sharepoint/dsp", "request", reqHeader );
 
-      QueryRequest myRequest = new QueryRequest();
+        QueryRequest myRequest = new QueryRequest();
 
-      DSQuery sQuery = new DSQuery();
-      sQuery.setSelect( "/list[@id='" + docLibrary + "']" );
-      sQuery.setResultContent(ResultContentType.dataOnly);
-      myRequest.setDsQuery( sQuery );
+        DSQuery sQuery = new DSQuery();
+        sQuery.setSelect( "/list[@id='" + docLibrary + "']" );
+        sQuery.setResultContent(ResultContentType.dataOnly);
+        myRequest.setDsQuery( sQuery );
 
-      DspQuery spQuery = new DspQuery();
-      spQuery.setRowLimit( 1 );
-      // For the Requested Fields
-      if ( fieldNames.size() > 0 )
-      {
-        Fields spFields = new Fields();
-        Field[] fieldArray = new Field[0];
-        ArrayList fields = new ArrayList();
+        DspQuery spQuery = new DspQuery();
+        spQuery.setRowLimit( 1 );
+        // For the Requested Fields
+        if ( fieldNames.size() > 0 )
+        {
+          Fields spFields = new Fields();
+          Field[] fieldArray = new Field[0];
+          ArrayList fields = new ArrayList();
 
-        Field spField = new Field();
-        //                      spField.setName( "ID" );
-        //                      spField.setAlias( "ID" );
-        //                      fields.add( spField );
+          Field spField = new Field();
+          //                      spField.setName( "ID" );
+          //                      spField.setAlias( "ID" );
+          //                      fields.add( spField );
 
-        for ( int k = 0; k < fieldNames.size(); k++ )
+          for ( int k = 0; k < fieldNames.size(); k++ )
+          {
+            spField = new Field();
+            spField.setName( (String)fieldNames.get(k) );
+            spField.setAlias( (String)fieldNames.get(k) );
+            fields.add( spField );
+          }
+          spFields.setField( (Field[]) fields.toArray( fieldArray ));
+          spQuery.setFields( spFields );
+        }
+        // Of this document
+        DspQueryWhere spWhere = new DspQueryWhere();
+
+        org.apache.axis.message.MessageElement criterion = new org.apache.axis.message.MessageElement( (String)null, "Contains" );
+        SOAPElement seFieldRef = criterion.addChildElement( "FieldRef" );
+        seFieldRef.addAttribute( SOAPFactory.newInstance().createName("Name") , "FileRef" );
+        SOAPElement seValue = criterion.addChildElement( "Value" );
+        seValue.addAttribute( SOAPFactory.newInstance().createName("Type") , "String" );
+        seValue.setValue( docId );
+
+        org.apache.axis.message.MessageElement[] criteria = { criterion };
+        spWhere.set_any( criteria );
+        spQuery.setWhere( (DspQueryWhere)spWhere );
+
+        // Set Criteria
+        myRequest.getDsQuery().setQuery(spQuery);
+
+        StsAdapterSoap call = stub;
+
+        // Make Request
+        QueryResponse resp = call.query( myRequest );
+        org.apache.axis.message.MessageElement[] list = resp.get_any();
+
+        if (Logging.connectors.isDebugEnabled())
         {
-          spField = new Field();
-          spField.setName( (String)fieldNames.get(k) );
-          spField.setAlias( (String)fieldNames.get(k) );
-          fields.add( spField );
+          Logging.connectors.debug("SharePoint: list xml: '" + list[0].toString() + "'");
         }
-        spFields.setField( (Field[]) fields.toArray( fieldArray ));
-        spQuery.setFields( spFields );
-      }
-      // Of this document
-      DspQueryWhere spWhere = new DspQueryWhere();
 
-      org.apache.axis.message.MessageElement criterion = new org.apache.axis.message.MessageElement( (String)null, "Contains" );
-      SOAPElement seFieldRef = criterion.addChildElement( "FieldRef" );
-      seFieldRef.addAttribute( SOAPFactory.newInstance().createName("Name") , "FileRef" );
-      SOAPElement seValue = criterion.addChildElement( "Value" );
-      seValue.addAttribute( SOAPFactory.newInstance().createName("Type") , "String" );
-      seValue.setValue( docId );
+        XMLDoc doc = new XMLDoc( list[0].toString() );
+        ArrayList nodeList = new ArrayList();
 
-      org.apache.axis.message.MessageElement[] criteria = { criterion };
-      spWhere.set_any( criteria );
-      spQuery.setWhere( (DspQueryWhere)spWhere );
+        doc.processPath(nodeList, "*", null);
+        if (nodeList.size() != 1)
+        {
+          throw new ManifoldCFException("Bad xml - missing outer 'ns1:dsQueryResponse' node - there are "+Integer.toString(nodeList.size())+" nodes");
+        }
 
-      // Set Criteria
-      myRequest.getDsQuery().setQuery(spQuery);
+        Object parent = nodeList.get(0);
+        //System.out.println( "Outer NodeName = " + doc.getNodeName(parent) );
+        if (!doc.getNodeName(parent).equals("ns1:dsQueryResponse"))
+          throw new ManifoldCFException("Bad xml - outer node is not 'ns1:dsQueryResponse'");
 
-      StsAdapterSoap call = stub;
+        nodeList.clear();
+        doc.processPath(nodeList, "*", parent);
 
-      // Make Request
-      QueryResponse resp = call.query( myRequest );
-      org.apache.axis.message.MessageElement[] list = resp.get_any();
+        parent = nodeList.get( 0 ); // <Shared_X0020_Documents />
 
-      if (Logging.connectors.isDebugEnabled())
-      {
-        Logging.connectors.debug("SharePoint: list xml: '" + list[0].toString() + "'");
-      }
+        nodeList.clear();
+        doc.processPath(nodeList, "*", parent);
 
-      XMLDoc doc = new XMLDoc( list[0].toString() );
-      ArrayList nodeList = new ArrayList();
+        // Process each result (Should only be one )
+        // Get each childs Value and add to return array
+        for ( int i= 0; i < nodeList.size(); i++ )
+        {
+          Object documentNode = nodeList.get( i );
+          ArrayList fieldList = new ArrayList();
 
-      doc.processPath(nodeList, "*", null);
-      if (nodeList.size() != 1)
-      {
-        throw new ManifoldCFException("Bad xml - missing outer 'ns1:dsQueryResponse' node - there are "+Integer.toString(nodeList.size())+" nodes");
+          doc.processPath( fieldList, "*", documentNode );
+          for ( int j =0; j < fieldList.size(); j++)
+          {
+            Object field = fieldList.get( j );
+            String fieldData = doc.getData(field);
+            String fieldName = doc.getNodeName(field);
+            // Right now this really only works right for single-valued fields.  For multi-valued
+            // fields, we'd need to know in advance that they were multivalued
+            // so that we could interpret commas as value separators.
+            result.put(fieldName,fieldData);
+          }
+        }
       }
+      else
+      {
+        // SharePoint 2010: Get field values some other way
+        // Sharepoint 2010; use Lists service instead
+        ListsWS lservice = new ListsWS(baseUrl + site, userName, password, myFactory, configuration, connectionManager );
+        ListsSoapStub stub1 = (ListsSoapStub)lservice.getListsSoapHandler();
+        
+        GetListItemsQuery q = buildMatchQuery("FileRef","Text",docId);
+        GetListItemsViewFields viewFields = buildViewFields(fieldNames);
 
-      Object parent = nodeList.get(0);
-      //System.out.println( "Outer NodeName = " + doc.getNodeName(parent) );
-      if (!doc.getNodeName(parent).equals("ns1:dsQueryResponse"))
-        throw new ManifoldCFException("Bad xml - outer node is not 'ns1:dsQueryResponse'");
+        GetListItemsResponseGetListItemsResult items =  stub1.getListItems(docLibrary, "", q, viewFields, "1", null, null);
+        if (items == null)
+          return result;
 
-      nodeList.clear();
-      doc.processPath(nodeList, "*", parent);
+        MessageElement[] list = items.get_any();
 
-      parent = nodeList.get( 0 ); // <Shared_X0020_Documents />
+        if (Logging.connectors.isDebugEnabled()){
+          Logging.connectors.debug("SharePoint: getListItems for '"+docId+"' xml response: '" + list[0].toString() + "'");
+        }
 
-      nodeList.clear();
-      doc.processPath(nodeList, "*", parent);
+        ArrayList nodeList = new ArrayList();
+        XMLDoc doc = new XMLDoc(list[0].toString());
 
-      // Process each result (Should only be one )
-      // Get each childs Value and add to return array
-      for ( int i= 0; i < nodeList.size(); i++ )
-      {
-        Object documentNode = nodeList.get( i );
-        ArrayList fieldList = new ArrayList();
+        doc.processPath(nodeList, "*", null);
+        if (nodeList.size() != 1)
+          throw new ManifoldCFException("Bad xml - expecting one outer 'ns1:listitems' node - there are " + Integer.toString(nodeList.size()) + " nodes");
 
-        doc.processPath( fieldList, "*", documentNode );
-        for ( int j =0; j < fieldList.size(); j++)
+        Object parent = nodeList.get(0);
+        if (!"ns1:listitems".equals(doc.getNodeName(parent)))
+          throw new ManifoldCFException("Bad xml - outer node is not 'ns1:listitems'");
+
+        nodeList.clear();
+        doc.processPath(nodeList, "*", parent);
+
+        if (nodeList.size() != 1)
+          throw new ManifoldCFException("Expected rsdata result but no results found.");
+
+        Object rsData = nodeList.get(0);
+
+        int itemCount = Integer.parseInt(doc.getValue(rsData, "ItemCount"));
+        if (itemCount == 0)
+          return result;
+          
+        // Now, extract the files from the response document
+        ArrayList nodeDocs = new ArrayList();
+
+        doc.processPath(nodeDocs, "*", rsData);
+
+        if (nodeDocs.size() != itemCount)
+          throw new ManifoldCFException("itemCount does not match with nodeDocs.size()");
+
+        if (itemCount != 1)
+          throw new ManifoldCFException("Expecting only one item, instead saw '"+itemCount+"'");
+        
+        Object o = nodeDocs.get(0);
+        
+        // Look for all the specified attributes in the record
+        for (Object attrName : fieldNames)
         {
-          Object field = fieldList.get( j );
-          String fieldData = doc.getData(field);
-          String fieldName = doc.getNodeName(field);
-          // Right now this really only works right for single-valued fields.  For multi-valued
-          // fields, we'd need to know in advance that they were multivalued
-          // so that we could interpret commas as value separators.
-          result.put(fieldName,fieldData);
+          String attrValue = doc.getValue(o,"ows_"+(String)attrName);
+          if (attrValue != null)
+          {
+            result.put(attrName,valueMunge(attrValue));
+          }
         }
       }
 
@@ -1836,6 +2046,130 @@ public class SPSProxyHelper {
     }
   }
 
+  // Regexp pattern to match 12345;#
+  protected static Pattern subsPattern;
+  static
+  {
+    try
+    {
+      subsPattern = Pattern.compile("[0-9]*;#.*");
+    }
+    catch (Exception e)
+    {
+      e.printStackTrace();
+      System.exit(-100);
+    }
+  }
+  
+  /** Substitute progid where found */
+  protected static String valueMunge(String value)
+  {
+    Matcher matcher = subsPattern.matcher(value);
+    if (matcher.matches())
+      return value.substring(value.indexOf("#") + 1);
+    return value;
+  }
+  
+  /** Build viewFields XML for the ListItems call.
+  */
+  protected static GetListItemsViewFields buildViewFields(ArrayList fieldNames)
+    throws ManifoldCFException
+  {
+    try
+    {
+      GetListItemsViewFields rval = new GetListItemsViewFields();
+      MessageElement viewFieldsNode = new MessageElement((String)null,"ViewFields");
+      rval.set_any(new MessageElement[]{viewFieldsNode});
+      for (Object x : fieldNames)
+      {
+        MessageElement child = new MessageElement((String)null,"FieldRef");
+        viewFieldsNode.addChild(child);
+        child.addAttribute(null,"Name",(String)x);
+      }
+      return rval;
+    }
+    catch (javax.xml.soap.SOAPException e)
+    {
+      throw new ManifoldCFException(e.getMessage(),e);
+    }
+  }
+  
+  /** Build a query XML object that matches a specified field and value pair.
+  */
+  protected static GetListItemsQuery buildMatchQuery(String fieldName, String type, String value)
+    throws ManifoldCFException
+  {
+    try
+    {
+      GetListItemsQuery rval = new GetListItemsQuery();
+      MessageElement queryNode = new MessageElement((String)null,"Query");
+      rval.set_any(new MessageElement[]{queryNode});
+      MessageElement whereNode = new MessageElement((String)null,"Where");
+      queryNode.addChild(whereNode);
+      MessageElement eqNode = new MessageElement((String)null,"Eq");
+      whereNode.addChild(eqNode);
+      MessageElement fieldRefNode = new MessageElement((String)null,"FieldRef");
+      eqNode.addChild(fieldRefNode);
+      fieldRefNode.addAttribute(null,"Name",fieldName);
+      MessageElement valueNode = new MessageElement((String)null,"Value");
+      eqNode.addChild(valueNode);
+      valueNode.addAttribute(null,"Type",type);
+      valueNode.addTextNode(value);
+      return rval;
+    }
+    catch (javax.xml.soap.SOAPException e)
+    {
+      throw new ManifoldCFException(e.getMessage(),e);
+    }
+  }
+  
+  /** Build a query XML object that orders by an indexed column, for paging.
+  */
+  protected static GetListItemsQuery buildOrderedQuery(String indexedColumn)
+    throws ManifoldCFException
+  {
+    try
+    {
+      GetListItemsQuery rval = new GetListItemsQuery();
+      MessageElement queryNode = new MessageElement((String)null,"Query");
+      rval.set_any(new MessageElement[]{queryNode});
+      MessageElement orderByNode = new MessageElement((String)null,"OrderBy");
+      queryNode.addChild(orderByNode);
+      orderByNode.addAttribute(null,"Override","TRUE");
+      orderByNode.addAttribute(null,"UseIndexForOrderBy","TRUE");
+      MessageElement fieldRefNode = new MessageElement((String)null,"FieldRef");
+      orderByNode.addChild(fieldRefNode);
+      fieldRefNode.addAttribute(null,"Ascending","TRUE");
+      fieldRefNode.addAttribute(null,"Name",indexedColumn);
+      return rval;
+    }
+    catch (javax.xml.soap.SOAPException e)
+    {
+      throw new ManifoldCFException(e.getMessage(),e);
+    }
+  }
+  
+  /** Build queryOptions XML object that specifies a paging value.
+  */
+  protected static GetListItemsQueryOptions buildPagingQueryOptions(String pageNextString)
+    throws ManifoldCFException
+  {
+    try
+    {
+      GetListItemsQueryOptions rval = new GetListItemsQueryOptions();
+      MessageElement queryOptionsNode = new MessageElement((String)null,"QueryOptions");
+      rval.set_any(new MessageElement[]{queryOptionsNode});
+      MessageElement pagingNode = new MessageElement((String)null,"Paging");
+      queryOptionsNode.addChild(pagingNode);
+      pagingNode.addAttribute(null,"ListItemCollectionPositionNext",pageNextString);
+      return rval;
+    }
+    catch (javax.xml.soap.SOAPException e)
+    {
+      throw new ManifoldCFException(e.getMessage(),e);
+    }
+  }
+  
   /**
   * SharePoint Permissions Service Wrapper Class
   */
@@ -2136,7 +2470,7 @@ public class SPSProxyHelper {
     /**
      * Constructor setting the resource name.
      */
-    public ResourceProvider(Class resourceClass, String resourceName) 
+    public ResourceProvider(Class resourceClass, String resourceName)
     {
       this.resourceClass = resourceClass;
       this.resourceName = resourceName;
@@ -2213,7 +2547,7 @@ public class SPSProxyHelper {
 
     /**
      * Get a service which has been mapped to a particular namespace
-     * 
+     *
      * @param namespace a namespace URI
      * @return an instance of the appropriate Service, or null
      */
@@ -2262,7 +2596,7 @@ public class SPSProxyHelper {
     public Hashtable getGlobalOptions() throws ConfigurationException
     {
       WSDDGlobalConfiguration globalConfig = deployment.getGlobalConfiguration();
-            
+
       if (globalConfig != null)
         return globalConfig.getParametersTable();
 
@@ -2288,5 +2622,5 @@ public class SPSProxyHelper {
       return deployment.getRoles();
     }
   }
-  
+
 }

Modified: manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java?rev=1362069&r1=1362068&r2=1362069&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java (original)
+++ manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java Mon Jul 16 14:45:20 2012
@@ -61,6 +61,7 @@ public class SharePointRepository extend
   public final static String ACTIVITY_FETCH = "fetch";
 
   private boolean supportsItemSecurity = false;
+  private boolean dspStsWorks = true;
   private String serverProtocol = null;
   private String serverUrl = null;
   private String fileBaseUrl = null;
@@ -117,7 +118,8 @@ public class SharePointRepository extend
       String serverVersion = params.getParameter( "serverVersion" );
       if (serverVersion == null)
         serverVersion = "2.0";
-      supportsItemSecurity = serverVersion.equals("3.0");
+      supportsItemSecurity = !serverVersion.equals("2.0");
+      dspStsWorks = !serverVersion.equals("4.0");
 
       serverProtocol = params.getParameter( "serverProtocol" );
       if (serverProtocol == null)
@@ -669,7 +671,7 @@ public class SharePointRepository extend
                 // The document path includes the library, with no leading slash, and is decoded.
                 int cutoff = decodedLibPath.lastIndexOf("/");
                 String decodedDocumentPathWithoutSite = decodedDocumentPath.substring(cutoff+1);
-                Map values = proxy.getFieldValues( metadataDescription, encodedSitePath, libID, decodedDocumentPathWithoutSite);
+                Map values = proxy.getFieldValues( metadataDescription, encodedSitePath, libID, decodedDocumentPathWithoutSite, dspStsWorks );
                 String modifyDate = (String)values.get("Last_x0020_Modified");
                 if (modifyDate != null)
                 {
@@ -903,49 +905,9 @@ public class SharePointRepository extend
           String libID = proxy.getDocLibID( encodePath(site), site, libName );
           if (libID != null)
           {
-            XMLDoc docs = proxy.getDocuments( encodePath(site) , libID );
-            if (docs != null)
-            {
-              ArrayList nodeDocs = new ArrayList();
-
-              docs.processPath( nodeDocs, "*", null );
-              Object parent = nodeDocs.get(0);                // ns1:dsQueryResponse
-              nodeDocs.clear();
-              docs.processPath(nodeDocs, "*", parent);
-              Object documents = nodeDocs.get(1);
-              nodeDocs.clear();
-              docs.processPath(nodeDocs, "*", documents);
-
-              StringBuilder sb = new StringBuilder();
-              for( int j =0; j < nodeDocs.size(); j++)
-              {
-                Object node = nodeDocs.get(j);
-                Logging.connectors.debug( node.toString() );
-                String relPath = docs.getData( docs.getElement( node, "FileRef" ) );
-
-                // This relative path is apparently from the domain on down; if there's a location offset we therefore
-                // need to get rid of it before checking the document against the site/library tuples.  The recorded
-                // document identifier should also not include it.
-
-                if (!relPath.toLowerCase().startsWith(serverLocation.toLowerCase()))
-                {
-                  // Unexpected processing error; the path to the folder or document did not start with the location
-                  // offset, so throw up.
-                  throw new ManifoldCFException("Internal error: Relative path '"+relPath+"' was expected to start with '"+
-                    serverLocation+"'");
-                }
-
-                relPath = relPath.substring(serverLocation.length());
-
-                // Since the processing for a file needs to know the library path, we need a way to signal the cutoff between library and folder levels.
-                // The way I've chosen to do this is to use a double slash at that point, as a separator.
-                String modifiedPath = relPath.substring(0,foldersFilePathIndex) + "/" + relPath.substring(foldersFilePathIndex);
-
-                if ( !relPath.endsWith(".aspx") && checkInclude( relPath, spec ) )
-                  activities.addDocumentReference( modifiedPath );
-              }
-            }
-            else
+            FileStream fs = new FileStream( activities, foldersFilePathIndex, spec );
+            boolean success = proxy.getDocuments( fs, encodePath(site) , libID, dspStsWorks );
+            if (!success)
             {
               // Site/library no longer exists, so delete entry
               if (Logging.connectors.isDebugEnabled())
@@ -1261,7 +1223,7 @@ public class SharePointRepository extend
                       }
 
                       int cutoff = decodedLibPath.lastIndexOf("/");
-                      Map values = proxy.getFieldValues( metadataDescription, encodePath(site), documentLibID, decodedDocumentPath.substring(cutoff+1) );
+                      Map values = proxy.getFieldValues( metadataDescription, encodePath(site), documentLibID, decodedDocumentPath.substring(cutoff+1), dspStsWorks );
                       if (values != null)
                       {
                         Iterator iter = values.keySet().iterator();
@@ -1347,6 +1309,34 @@ public class SharePointRepository extend
     }
   }
 
+  protected class FileStream implements IFileStream
+  {
+    protected IProcessActivity activities;
+    protected int foldersFilePathIndex;
+    protected DocumentSpecification spec;
+    
+    public FileStream(IProcessActivity activities, int foldersFilePathIndex, DocumentSpecification spec)
+    {
+      this.activities = activities;
+      this.foldersFilePathIndex = foldersFilePathIndex;
+      this.spec = spec;
+    }
+    
+    public void addFile(String relPath)
+      throws ManifoldCFException
+    {
+      if ( checkInclude( relPath, spec ) )
+      {
+        // Since the processing for a file needs to know the library path, we need a way to signal the cutoff between library and folder levels.
+        // The way I've chosen to do this is to use a double slash at that point, as a separator.
+        String modifiedPath = relPath.substring(0,foldersFilePathIndex) + "/" + relPath.substring(foldersFilePathIndex);
+
+        activities.addDocumentReference( modifiedPath );
+      }
+    }
+  }
+  
+
   // UI support methods.
   //
   // These support methods come in two varieties.  The first bunch is involved in setting up connection configuration information.  The second bunch
@@ -1548,8 +1538,9 @@ public class SharePointRepository extend
 "    <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"SharePointRepository.ServerSharePointVersion") + "</nobr></td>\n"+
 "    <td class=\"value\">\n"+
 "      <select name=\"serverVersion\">\n"+
-"        <option value=\"2.0\" "+((serverVersion.equals("2.0"))?"selected=\"true\"":"")+">SharePoint Services 2.0</option>\n"+
-"        <option value=\"3.0\" "+(serverVersion.equals("3.0")?"selected=\"true\"":"")+">SharePoint Services 3.0</option>\n"+
+"        <option value=\"2.0\" "+((serverVersion.equals("2.0"))?"selected=\"true\"":"")+">SharePoint Services 2.0 (2003)</option>\n"+
+"        <option value=\"3.0\" "+(serverVersion.equals("3.0")?"selected=\"true\"":"")+">SharePoint Services 3.0 (2007)</option>\n"+
+"        <option value=\"4.0\" "+(serverVersion.equals("4.0")?"selected=\"true\"":"")+">SharePoint Services 4.0 (2010)</option>\n"+
 "      </select>\n"+
 "    </td>\n"+
 "  </tr>\n"+

Modified: manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/common/XMLDoc.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/common/XMLDoc.java?rev=1362069&r1=1362068&r2=1362069&view=diff
==============================================================================
--- manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/common/XMLDoc.java (original)
+++ manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/common/XMLDoc.java Mon Jul 16 14:45:20 2012
@@ -241,6 +241,15 @@ public class XMLDoc
     }
   }
 
+  /** Get XML with no entity preamble */
+  public String getXMLNoEntityPreamble()
+    throws ManifoldCFException
+  {
+    String initial = getXML();
+    int index = initial.indexOf(">");
+    return initial.substring(index+1);
+  }
+  
   /** Convert the response for transmit
   * @return xml in byte array
   */