You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/12/30 15:27:02 UTC
svn commit: r1648535 -
/manifoldcf/branches/CONNECTORS-1130/connectors/documentum/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/DCTM/DCTM.java
Author: kwright
Date: Tue Dec 30 14:27:02 2014
New Revision: 1648535
URL: http://svn.apache.org/r1648535
Log:
Hook up filtering functionality
Modified:
manifoldcf/branches/CONNECTORS-1130/connectors/documentum/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/DCTM/DCTM.java
Modified: manifoldcf/branches/CONNECTORS-1130/connectors/documentum/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/DCTM/DCTM.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1130/connectors/documentum/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/DCTM/DCTM.java?rev=1648535&r1=1648534&r2=1648535&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1130/connectors/documentum/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/DCTM/DCTM.java (original)
+++ manifoldcf/branches/CONNECTORS-1130/connectors/documentum/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/DCTM/DCTM.java Tue Dec 30 14:27:02 2014
@@ -1052,13 +1052,12 @@ public class DCTM extends org.apache.man
// First, build the query
- int i = 0;
StringBuilder strLocationsClause = new StringBuilder();
- ArrayList tokenList = new ArrayList();
- ArrayList contentList = null;
+ Map<String,Map<String,Map<String,Set<String>>>> tokenList = new HashMap<String,Map<String,Map<String,Set<String>>>>();
+ List<String> contentList = null;
String maxSize = null;
- while (i < spec.getChildCount())
+ for (int i = 0; i < spec.getChildCount(); i++)
{
SpecificationNode n = spec.getChild(i);
if (n.getType().equals(CONFIG_PARAM_LOCATION))
@@ -1079,13 +1078,42 @@ public class DCTM extends org.apache.man
else if (n.getType().equals(CONFIG_PARAM_OBJECTTYPE))
{
String objType = n.getAttributeValue("token");
- tokenList.add(objType);
+ Map<String,Map<String,Set<String>>> filters = tokenList.get(objType);
+ if (filters == null)
+ {
+ filters = new HashMap<String,Map<String,Set<String>>>();
+ tokenList.put(objType,filters);
+ }
+ // Go through children and pick out filters
+ for (int j = 0; j < n.getChildCount(); j++)
+ {
+ SpecificationNode sn = n.getChild(j);
+ if (sn.getType().equals(CONFIG_PARAM_FILTER))
+ {
+ String attributeName = sn.getAttributeValue("name");
+ String operation = sn.getAttributeValue("op");
+ String value = sn.getAttributeValue("value");
+ Map<String,Set<String>> operations = filters.get(attributeName);
+ if (operations == null)
+ {
+ operations = new HashMap<String,Set<String>>();
+ filters.put(attributeName,operations);
+ }
+ Set<String> values = operations.get(operation);
+ if (values == null)
+ {
+ values = new HashSet<String>();
+ operations.put(operation,values);
+ }
+ values.add(value);
+ }
+ }
}
else if (n.getType().equals(CONFIG_PARAM_FORMAT))
{
String docType = n.getAttributeValue("value");
if (contentList == null)
- contentList = new ArrayList();
+ contentList = new ArrayList<String>();
contentList.add(docType);
}
else if (n.getType().equals(CONFIG_PARAM_MAXLENGTH))
@@ -1093,13 +1121,12 @@ public class DCTM extends org.apache.man
maxSize = n.getAttributeValue("value");
}
- i++;
}
if (tokenList.size() == 0)
{
Logging.connectors.debug("DCTM: No ObjectType found in Document Spec. Setting it to dm_document");
- tokenList.add("dm_document");
+ tokenList.put("dm_document",new HashMap<String,Map<String,Set<String>>>());
}
if (strLocationsClause.length() < 1)
@@ -1133,14 +1160,15 @@ public class DCTM extends org.apache.man
strDQLend.append(" AND 1<0");
else
{
- i = 0;
strDQLend.append(" AND a_content_type IN (");
- while (i < dctmTypes.length)
+ boolean commaNeeded = false;
+ for (String cType : dctmTypes)
{
- if (i > 0)
+ if (commaNeeded)
strDQLend.append(",");
- String cType = dctmTypes[i++];
- strDQLend.append("'").append(cType).append("'");
+ else
+ commaNeeded = true;
+ strDQLend.append(quoteDQLString(cType));
}
strDQLend.append(")");
}
@@ -1156,12 +1184,56 @@ public class DCTM extends org.apache.man
}
// Now, loop through the documents and queue them up.
- int tokenIndex = 0;
- while (tokenIndex < tokenList.size())
+ for (String tokenValue : tokenList.keySet())
{
activities.checkJobStillActive();
- String tokenValue = (String)tokenList.get(tokenIndex);
- String strDQL = strDQLstart + tokenValue + strDQLend;
+
+ // Construct the filter part of the DQL query
+ Map<String,Map<String,Set<String>>> filters = tokenList.get(tokenValue);
+
+ StringBuilder filterPart = new StringBuilder();
+ // For each attribute, go through the operations and emit an AND clause
+ for (String attributeName : filters.keySet())
+ {
+ filterPart.append(" AND ");
+ Map<String,Set<String>> operations = filters.get(attributeName);
+ for (String operation : operations.keySet())
+ {
+ Set<String> values = operations.get(operation);
+ if (operation.equals("="))
+ {
+ filterPart.append("\"").append(attributeName).append("\"").append(" IN (");
+ boolean commaNeeded = false;
+ for (String value : values)
+ {
+ if (commaNeeded)
+ filterPart.append(",");
+ else
+ commaNeeded = true;
+ filterPart.append(quoteDQLString(value));
+ }
+ filterPart.append(")");
+ }
+ else if (operation.equals("<>"))
+ {
+ filterPart.append("(");
+ boolean andNeeded = false;
+ for (String value : values)
+ {
+ if (andNeeded)
+ filterPart.append(" AND ");
+ else
+ andNeeded = true;
+ filterPart.append("\"").append(attributeName).append("\"").append("<>").append(quoteDQLString(value));
+ }
+ filterPart.append(")");
+ }
+ else
+ throw new ManifoldCFException("Unrecognized operation: "+operation);
+ }
+ }
+
+ String strDQL = strDQLstart + tokenValue + strDQLend + filterPart;
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("DCTM: About to execute query= (" + strDQL + ")");
while (true)
@@ -1191,7 +1263,6 @@ public class DCTM extends org.apache.man
activities.addSeedDocument(next);
}
t.finishUp();
- tokenIndex++;
// Go on to next document type and repeat
break;
}
@@ -1256,25 +1327,33 @@ public class DCTM extends org.apache.man
}
/** Do a query and read back the name column */
- protected static String[] convertToDCTMTypes(ArrayList contentList)
+ protected static String[] convertToDCTMTypes(List<String> contentList)
throws ManifoldCFException, ServiceInterruption
{
if (contentList != null && contentList.size() > 0)
{
// The contentList has type names.
- String[] rval = new String[contentList.size()];
- int i = 0;
- while (i < rval.length)
- {
- rval[i] = (String)contentList.get(i);
- i++;
- }
- return rval;
+ return contentList.toArray(new String[0]);
}
return null;
}
+ protected static String quoteDQLString(String value)
+ {
+ StringBuilder sb = new StringBuilder();
+ sb.append("'");
+ for (int i = 0; i < value.length(); i++)
+ {
+ char x = value.charAt(i);
+ if (x == '\'')
+ sb.append("'");
+ sb.append(x);
+ }
+ sb.append("'");
+ return sb.toString();
+ }
+
protected class ProcessDocumentThread extends Thread
{
// Initial data