You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2012/07/10 14:24:20 UTC
svn commit: r1359633 - in
/manifoldcf/branches/CONNECTORS-470/connectors/sharepoint/connector/src:
main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/
test/java/org/apache/manifoldcf/crawler/connectors/sharepoint/
Author: kwright
Date: Tue Jul 10 12:24:19 2012
New Revision: 1359633
URL: http://svn.apache.org/viewvc?rev=1359633&view=rev
Log:
Add the code that pages through items
Modified:
manifoldcf/branches/CONNECTORS-470/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java
manifoldcf/branches/CONNECTORS-470/connectors/sharepoint/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/sharepoint/XMLGenTest.java
Modified: manifoldcf/branches/CONNECTORS-470/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-470/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java?rev=1359633&r1=1359632&r2=1359633&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-470/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java (original)
+++ manifoldcf/branches/CONNECTORS-470/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java Tue Jul 10 12:24:19 2012
@@ -43,6 +43,7 @@ import org.apache.axis.EngineConfigurati
import javax.xml.namespace.QName;
+import org.apache.axis.message.MessageElement;
import org.apache.axis.AxisEngine;
import org.apache.axis.ConfigurationException;
import org.apache.axis.Handler;
@@ -609,94 +610,112 @@ public class SPSProxyHelper {
// Sharepoint 2010; use Lists service instead
ListsWS lservice = new ListsWS(baseUrl + site, userName, password, myFactory, configuration, connectionManager );
ListsSoapStub stub1 = (ListsSoapStub)lservice.getListsSoapHandler();
- GetListItemsQuery q = new GetListItemsQuery();
-
- // TODO: 5000 is obviously not a reasonable limit. What IS a reasonable limit?
- GetListItemsResponseGetListItemsResult items = stub1.getListItems(docLibrary, "", null, null, "5000", null, site);
- if (items == null)
- return false;
- org.apache.axis.message.MessageElement[] list = items.get_any();
+ // This string is the paging chunk description. It gets updated on every chunk we do,
+ // so that the next call finds new data.
+ String nextChunkDescription = "";
+ // Order by some column we know is indexed.
+ GetListItemsQuery orderByQuery = buildOrderedQuery("ID");
+ // Set up fields we want
+ ArrayList fieldList = new ArrayList();
+ fieldList.add("FileRef");
+ fieldList.add("ProgId");
+ GetListItemsViewFields viewFields = buildViewFields(fieldList);
+ // Pick a request size we know will not exceed the limit as set by the administrator.
+ int requestSize = 2000;
+
+ while (true)
+ {
+ GetListItemsResponseGetListItemsResult items = stub1.getListItems(docLibrary, "", orderByQuery, viewFields, Integer.toString(requestSize), buildPagingQueryOptions(nextChunkDescription), site);
+ if (items == null)
+ return false;
- if (Logging.connectors.isInfoEnabled()){
- Logging.connectors.info("SharePoint: getListItems xml response: '" + list[0].toString() + "'");
- }
+ org.apache.axis.message.MessageElement[] list = items.get_any();
- ArrayList nodeList = new ArrayList();
- XMLDoc doc = new XMLDoc(list[0].toString());
+ if (Logging.connectors.isInfoEnabled()){
+ Logging.connectors.info("SharePoint: getListItems xml response: '" + list[0].toString() + "'");
+ }
+ ArrayList nodeList = new ArrayList();
+ XMLDoc doc = new XMLDoc(list[0].toString());
- doc.processPath(nodeList, "*", null);
- if (nodeList.size() != 1) {
- throw new ManifoldCFException("Bad xml - missing outer 'ns1:listitems' node - there are " + Integer.toString(nodeList.size()) + " nodes");
- }
+ doc.processPath(nodeList, "*", null);
+ if (nodeList.size() != 1) {
+ throw new ManifoldCFException("Bad xml - expecting one outer 'ns1:listitems' node - there are " + Integer.toString(nodeList.size()) + " nodes");
+ }
- Object parent = nodeList.get(0);
- if (!"ns1:listitems".equals(doc.getNodeName(parent)))
+ Object parent = nodeList.get(0);
+ if (!"ns1:listitems".equals(doc.getNodeName(parent)))
throw new ManifoldCFException("Bad xml - outer node is not 'ns1:listitems'");
- nodeList.clear();
- doc.processPath(nodeList, "*", parent);
- // System.out.println(nodeList.size());
+ nodeList.clear();
+ doc.processPath(nodeList, "*", parent);
- if (nodeList.size() != 1) {
- throw new ManifoldCFException("No results found.");
- }
+ if (nodeList.size() != 1)
+ throw new ManifoldCFException("Expected rsdata result but no results found.");
- Object rsData = nodeList.get(0);
+ Object rsData = nodeList.get(0);
- int itemCount = Integer.parseInt(doc.getValue(rsData, "ItemCount"));
- // System.out.println("ItemCount = " + itemCount);
+ // Get the chunk description
+ nextChunkDescription = doc.getValue(rsData, "ListItemCollectionPositionNext");
+ if (nextChunkDescription == null)
+ throw new ManifoldCFException("Expected rsdata to have attribute 'ListItemCollectionPositionNext'");
- // Now, extract the files from the response document
- XMLDoc docs = doc;
- ArrayList nodeDocs = new ArrayList();
+ int itemCount = Integer.parseInt(doc.getValue(rsData, "ItemCount"));
- docs.processPath(nodeDocs, "*", rsData);
+ // Now, extract the files from the response document
+ XMLDoc docs = doc;
+ ArrayList nodeDocs = new ArrayList();
- if (nodeDocs.size() != itemCount) {
- throw new ManifoldCFException("itemCount does not match with nodeDocs.size().");
- }
+ docs.processPath(nodeDocs, "*", rsData);
- for (int j = 0; j < nodeDocs.size(); j++)
- {
+ if (nodeDocs.size() != itemCount) {
+ throw new ManifoldCFException("itemCount does not match with nodeDocs.size().");
+ }
- Object node = nodeDocs.get(j);
+ for (int j = 0; j < nodeDocs.size(); j++)
+ {
- String relPath = doc.getValue(node, "ows_FileRef");
- String ows_ProgId = doc.getValue(node, "ows_ProgId");
+ Object node = nodeDocs.get(j);
- // This relative path is apparently from the domain on down; if there's a location offset we therefore
- // need to get rid of it before checking the document against the site/library tuples. The recorded
- // document identifier should also not include it.
+ String relPath = doc.getValue(node, "ows_FileRef");
+ String ows_ProgId = doc.getValue(node, "ows_ProgId");
- // KDW: Removed the case changes; URL characters should remain case-sensitive
- if (!relPath.startsWith(serverLocation))
- {
- // Unexpected processing error; the path to the folder or document did not start with the location
- // offset, so throw up.
- throw new ManifoldCFException("Internal error: Relative path '"+relPath+"' was expected to start with '"+
- serverLocation+"'");
- }
+ // This relative path is apparently from the domain on down; if there's a location offset we therefore
+ // need to get rid of it before checking the document against the site/library tuples. The recorded
+ // document identifier should also not include it.
- relPath = relPath.substring(serverLocation.length());
+ // KDW: Removed the case changes; URL characters should remain case-sensitive
+ if (!relPath.startsWith(serverLocation))
+ {
+ // Unexpected processing error; the path to the folder or document did not start with the location
+ // offset, so throw up.
+ throw new ManifoldCFException("Internal error: Relative path '"+relPath+"' was expected to start with '"+
+ serverLocation+"'");
+ }
- /**
- * ows_FileRef starts with ows_ProgId.
- * Replace ows_ProgId with "/".
- * E.g. ows_FileRef="1;#Documents/ik_docs" ows_ProgId="1;#" => relPah="/Documents/ik_docs"
- */
- if (relPath.startsWith(ows_ProgId)) {
- relPath = "/" + relPath.substring(ows_ProgId.length());
- }
+ relPath = relPath.substring(serverLocation.length());
- if (!relPath.endsWith(".aspx")) {
- fileStream.addFile( relPath );
+ /**
+ * ows_FileRef starts with ows_ProgId.
+ * Replace ows_ProgId with "/".
+ * E.g. ows_FileRef="1;#Documents/ik_docs" ows_ProgId="1;#" => relPah="/Documents/ik_docs"
+ */
+ if (relPath.startsWith(ows_ProgId)) {
+ relPath = "/" + relPath.substring(ows_ProgId.length());
+ }
+
+ if (!relPath.endsWith(".aspx")) {
+ fileStream.addFile( relPath );
+ }
}
+
+ if (requestSize > nodeDocs.size())
+ break;
}
}
-
+
return true;
}
catch (java.net.MalformedURLException e)
@@ -2017,63 +2036,102 @@ public class SPSProxyHelper {
/** Build viewFields XML for the ListItems call.
*/
- protected static String buildViewFields(ArrayList fieldNames)
+ protected static GetListItemsViewFields buildViewFields(ArrayList fieldNames)
throws ManifoldCFException
{
- XMLDoc doc = new XMLDoc();
- Object viewFieldsNode = doc.createElement(null,"viewFields");
- for (Object x : fieldNames)
+ try
+ {
+ GetListItemsViewFields rval = new GetListItemsViewFields();
+ MessageElement viewFieldsNode = new MessageElement((String)null,"ViewFields");
+ rval.set_any(new MessageElement[]{viewFieldsNode});
+ for (Object x : fieldNames)
+ {
+ MessageElement child = new MessageElement((String)null,"FieldRef");
+ viewFieldsNode.addChild(child);
+ child.addAttribute(null,"Name",(String)x);
+ }
+ return rval;
+ }
+ catch (javax.xml.soap.SOAPException e)
{
- Object child = doc.createElement(viewFieldsNode,"FieldRef");
- doc.setAttribute(child,"Name",(String)x);
+ throw new ManifoldCFException(e.getMessage(),e);
}
- return doc.getXMLNoEntityPreamble();
}
/** Build a query XML object that matches a specified field and value pair.
*/
- protected static String buildMatchQuery(String fieldName, String type, String value)
+ protected static GetListItemsQuery buildMatchQuery(String fieldName, String type, String value)
throws ManifoldCFException
{
- XMLDoc doc = new XMLDoc();
- Object queryNode = doc.createElement(null,"Query");
- Object whereClause = doc.createElement(queryNode,"Where");
- Object equalsClause = doc.createElement(whereClause,"Eq");
- Object fieldRefClause = doc.createElement(equalsClause,"FieldRef");
- doc.setAttribute(fieldRefClause,"Name",fieldName);
- Object valueClause = doc.createElement(equalsClause,"Value");
- doc.setAttribute(valueClause,"Type",type);
- doc.createText(valueClause,value);
- return doc.getXMLNoEntityPreamble();
+ try
+ {
+ GetListItemsQuery rval = new GetListItemsQuery();
+ MessageElement queryNode = new MessageElement((String)null,"Query");
+ rval.set_any(new MessageElement[]{queryNode});
+ MessageElement whereNode = new MessageElement((String)null,"Where");
+ queryNode.addChild(whereNode);
+ MessageElement eqNode = new MessageElement((String)null,"Eq");
+ whereNode.addChild(eqNode);
+ MessageElement fieldRefNode = new MessageElement((String)null,"FieldRef");
+ eqNode.addChild(fieldRefNode);
+ fieldRefNode.addAttribute(null,"Name",fieldName);
+ MessageElement valueNode = new MessageElement((String)null,"Value");
+ eqNode.addChild(valueNode);
+ valueNode.addAttribute(null,"Type",type);
+ valueNode.addTextNode(value);
+ return rval;
+ }
+ catch (javax.xml.soap.SOAPException e)
+ {
+ throw new ManifoldCFException(e.getMessage(),e);
+ }
}
/** Build a query XML object that orders by an indexed column, for paging.
*/
- protected static String buildOrderedQuery(String indexedColumn)
+ protected static GetListItemsQuery buildOrderedQuery(String indexedColumn)
throws ManifoldCFException
{
- XMLDoc doc = new XMLDoc();
- Object queryNode = doc.createElement(null,"Query");
- Object orderByNode = doc.createElement(queryNode,"OrderBy");
- doc.setAttribute(orderByNode,"Override","TRUE");
- doc.setAttribute(orderByNode,"UseIndexForOrderBy","TRUE");
- Object fieldRefNode = doc.createElement(orderByNode,"FieldRef");
- doc.setAttribute(fieldRefNode,"Ascending","TRUE");
- doc.setAttribute(fieldRefNode,"Name",indexedColumn);
- return doc.getXMLNoEntityPreamble();
+ try
+ {
+ GetListItemsQuery rval = new GetListItemsQuery();
+ MessageElement queryNode = new MessageElement((String)null,"Query");
+ rval.set_any(new MessageElement[]{queryNode});
+ MessageElement orderByNode = new MessageElement((String)null,"OrderBy");
+ queryNode.addChild(orderByNode);
+ orderByNode.addAttribute(null,"Override","TRUE");
+ orderByNode.addAttribute(null,"UseIndexForOrderBy","TRUE");
+ MessageElement fieldRefNode = new MessageElement((String)null,"FieldRef");
+ orderByNode.addChild(fieldRefNode);
+ fieldRefNode.addAttribute(null,"Ascending","TRUE");
+ fieldRefNode.addAttribute(null,"Name",indexedColumn);
+ return rval;
+ }
+ catch (javax.xml.soap.SOAPException e)
+ {
+ throw new ManifoldCFException(e.getMessage(),e);
+ }
}
/** Build queryOptions XML object that specifies a paging value.
*/
- protected static String buildPagingQueryOptions(String pageNextString)
+ protected static GetListItemsQueryOptions buildPagingQueryOptions(String pageNextString)
throws ManifoldCFException
{
- XMLDoc doc = new XMLDoc();
- Object optionsNode = doc.createElement(null,"QueryOptions");
- Object pagingNode = doc.createElement(optionsNode,"Paging");
- doc.setAttribute(pagingNode,"ListItemCollectionPositionNext",
- pageNextString);
- return doc.getXMLNoEntityPreamble();
+ try
+ {
+ GetListItemsQueryOptions rval = new GetListItemsQueryOptions();
+ MessageElement queryOptionsNode = new MessageElement((String)null,"QueryOptions");
+ rval.set_any(new MessageElement[]{queryOptionsNode});
+ MessageElement pagingNode = new MessageElement((String)null,"Paging");
+ queryOptionsNode.addChild(pagingNode);
+ pagingNode.addAttribute(null,"ListItemCollectionPositionNext",pageNextString);
+ return rval;
+ }
+ catch (javax.xml.soap.SOAPException e)
+ {
+ throw new ManifoldCFException(e.getMessage(),e);
+ }
}
/**
Modified: manifoldcf/branches/CONNECTORS-470/connectors/sharepoint/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/sharepoint/XMLGenTest.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-470/connectors/sharepoint/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/sharepoint/XMLGenTest.java?rev=1359633&r1=1359632&r2=1359633&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-470/connectors/sharepoint/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/sharepoint/XMLGenTest.java (original)
+++ manifoldcf/branches/CONNECTORS-470/connectors/sharepoint/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/sharepoint/XMLGenTest.java Tue Jul 10 12:24:19 2012
@@ -30,7 +30,7 @@ public class XMLGenTest
public void buildOrderedQueryTest()
throws Exception
{
- String orderedQuery = SPSProxyHelper.buildOrderedQuery("ID");
+ String orderedQuery = SPSProxyHelper.buildOrderedQuery("ID").get_any()[0].toString();
assertEquals("<Query><OrderBy Override=\"TRUE\" UseIndexForOrderBy=\"TRUE\"><FieldRef Ascending=\"TRUE\" Name=\"ID\"/></OrderBy></Query>",orderedQuery);
}
@@ -38,7 +38,7 @@ public class XMLGenTest
public void buildPagingQueryOptionsTest()
throws Exception
{
- String pagingXML = SPSProxyHelper.buildPagingQueryOptions("some next string");
+ String pagingXML = SPSProxyHelper.buildPagingQueryOptions("some next string").get_any()[0].toString();
assertEquals("<QueryOptions><Paging ListItemCollectionPositionNext=\"some next string\"/></QueryOptions>",pagingXML);
}
@@ -49,15 +49,15 @@ public class XMLGenTest
ArrayList list = new ArrayList();
list.add("foo");
list.add("bar");
- String viewFieldsXML = SPSProxyHelper.buildViewFields(list);
- assertEquals("<viewFields><FieldRef Name=\"foo\"/><FieldRef Name=\"bar\"/></viewFields>",viewFieldsXML);
+ String viewFieldsXML = SPSProxyHelper.buildViewFields(list).get_any()[0].toString();
+ assertEquals("<ViewFields><FieldRef Name=\"foo\"/><FieldRef Name=\"bar\"/></ViewFields>",viewFieldsXML);
}
@Test
public void buildMatchQueryTest()
throws Exception
{
- String matchQuery = SPSProxyHelper.buildMatchQuery("foo","Text","bar");
+ String matchQuery = SPSProxyHelper.buildMatchQuery("foo","Text","bar").get_any()[0].toString();
assertEquals("<Query><Where><Eq><FieldRef Name=\"foo\"/><Value Type=\"Text\">bar</Value></Eq></Where></Query>",matchQuery);
}