You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2015/06/07 12:02:16 UTC
svn commit: r1684015 - in /manifoldcf/trunk: ./
connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/
connectors/forcedmetadata/connector/src/test/java/org/apache/manifoldcf/agents/transformation/...
Author: kwright
Date: Sun Jun 7 10:02:15 2015
New Revision: 1684015
URL: http://svn.apache.org/r1684015
Log:
Fix for CONNECTORS-1209.
Modified:
manifoldcf/trunk/CHANGES.txt
manifoldcf/trunk/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/FieldSource.java
manifoldcf/trunk/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ForcedMetadataConnector.java
manifoldcf/trunk/connectors/forcedmetadata/connector/src/test/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ExpressionTest.java
Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1684015&r1=1684014&r2=1684015&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Sun Jun 7 10:02:15 2015
@@ -3,6 +3,10 @@ $Id$
======================= 2.2-dev =====================
+CONNECTORS-1209: Add regular expression extraction to
+Metadata Adjuster.
+(Karl Wright)
+
CONNECTORS-1210: List notifications page links broken.
(KOIZUMI Satoru, Karl Wright)
Modified: manifoldcf/trunk/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/FieldSource.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/FieldSource.java?rev=1684015&r1=1684014&r2=1684015&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/FieldSource.java (original)
+++ manifoldcf/trunk/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/FieldSource.java Sun Jun 7 10:02:15 2015
@@ -23,15 +23,62 @@ import org.apache.manifoldcf.agents.inte
import java.io.*;
import java.util.*;
+import java.util.regex.*;
public class FieldSource implements IDataSource {
-
+
+ protected final static int CASE_EXACT = 0;
+ protected final static int CASE_LOWER = 1;
+ protected final static int CASE_UPPER = 2;
+
protected final FieldDataFactory rd;
protected final String fieldName;
-
- public FieldSource(FieldDataFactory rd, String fieldName) {
+ protected final Pattern regExpPattern;
+ protected final int groupNumber;
+ protected final int caseSpecifier;
+
+ protected String[] cachedValue;
+
+ public FieldSource(final FieldDataFactory rd, final String fieldName, final String regExp, final String groupNumber)
+ throws ManifoldCFException {
this.rd = rd;
this.fieldName = fieldName;
+ if (regExp == null || regExp.length() == 0) {
+ regExpPattern = null;
+ this.groupNumber = 0;
+ this.caseSpecifier = CASE_EXACT;
+ } else {
+ try {
+ this.regExpPattern = Pattern.compile(regExp);
+ if (groupNumber == null || groupNumber.length() == 0) {
+ this.groupNumber = 0;
+ this.caseSpecifier = CASE_EXACT;
+ } else {
+ final StringBuilder sb = new StringBuilder();
+ int caseResult = CASE_EXACT;
+ int i = 0;
+ while (i < groupNumber.length()) {
+ final char theChar = groupNumber.charAt(i++);
+ if (theChar >= '0' && theChar <= '9')
+ sb.append(theChar);
+ else if (theChar == 'l')
+ caseResult = CASE_LOWER;
+ else if (theChar == 'u')
+ caseResult = CASE_UPPER;
+ else
+ throw new ManifoldCFException("Regular expression group specifier '"+groupNumber+"' has illegal character '"+theChar+"'; should be a number, or number + l, or number + u");
+ }
+ if (sb.length() == 0)
+ throw new ManifoldCFException("Regular expression group specifier '"+groupNumber+"' must include a number");
+ this.caseSpecifier = caseResult;
+ this.groupNumber = Integer.parseInt(sb.toString());
+ }
+ } catch (NumberFormatException e) {
+ throw new ManifoldCFException("Regular expression group specifier '"+groupNumber+"': "+e.getMessage(),e);
+ } catch (PatternSyntaxException e) {
+ throw new ManifoldCFException("Regular expression '"+regExp+"': "+e.getMessage(),e);
+ }
+ }
}
@Override
@@ -46,12 +93,46 @@ public class FieldSource implements IDat
@Override
public Object[] getRawForm()
throws IOException, ManifoldCFException {
+ if (regExpPattern != null) {
+ return calculateExtractedResult();
+ }
return rd.getField(fieldName);
}
@Override
public String[] getStringForm()
throws IOException, ManifoldCFException {
+ if (regExpPattern != null) {
+ return calculateExtractedResult();
+ }
return rd.getFieldAsStrings(fieldName);
}
+
+ protected String[] calculateExtractedResult()
+ throws IOException, ManifoldCFException {
+ if (cachedValue == null) {
+ final String[] resultSources = rd.getFieldAsStrings(fieldName);
+ final List<String> resultList = new ArrayList<String>(resultSources.length);
+ for (String x : resultSources) {
+ final Matcher m = regExpPattern.matcher(x);
+ if (m.find()) {
+ String result = x.substring(m.start(groupNumber),m.end(groupNumber));
+ switch (caseSpecifier) {
+ case CASE_LOWER:
+ result = result.toLowerCase(Locale.ROOT);
+ break;
+ case CASE_UPPER:
+ result = result.toUpperCase(Locale.ROOT);
+ break;
+ case CASE_EXACT:
+ default:
+ break;
+ }
+ resultList.add(result);
+ }
+ }
+ cachedValue = resultList.toArray(new String[0]);
+ }
+ return cachedValue;
+ }
}
Modified: manifoldcf/trunk/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ForcedMetadataConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ForcedMetadataConnector.java?rev=1684015&r1=1684014&r2=1684015&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ForcedMetadataConnector.java (original)
+++ manifoldcf/trunk/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ForcedMetadataConnector.java Sun Jun 7 10:02:15 2015
@@ -542,7 +542,7 @@ public class ForcedMetadataConnector ext
addition = new HashSet<String>();
additions.put(parameter,addition);
}
- addition.add(expressionEscape(value));
+ addition.add(nonExpressionEscape(value));
}
else if (sn.getType().equals(NODE_EXPRESSION))
{
@@ -619,12 +619,16 @@ public class ForcedMetadataConnector ext
paramMap.put("FILTEREMPTY",filterEmptyValue);
}
- protected static String expressionEscape(String input) {
+ /** This is used to upgrade older constant values to new ones, that won't trigger expression eval.
+ */
+ protected static String nonExpressionEscape(String input) {
// Not doing any escaping yet
return input;
}
- protected static String expressionUnescape(String input) {
+ /** This is used to unescape text that's been escaped to prevent substitution of ${} expressions.
+ */
+ protected static String nonExpressionUnescape(String input) {
// Not doing any escaping yet
return input;
}
@@ -664,23 +668,64 @@ public class ForcedMetadataConnector ext
// Look for next field specification
int field = expression.indexOf("${",index);
if (field == -1)
- return append(input, new StringSource(expressionUnescape(expression.substring(index))));
+ return append(input, new StringSource(nonExpressionUnescape(expression.substring(index))));
if (field > 0)
- input = append(input, new StringSource(expressionUnescape(expression.substring(index,field))));
- // Get the field name
- int fieldEnd = expression.indexOf("}",field);
- String fieldName;
- if (fieldEnd == -1) {
- fieldName = expression.substring(field+2);
- return append(input, new FieldSource(sourceDocument, fieldName));
+ input = append(input, new StringSource(nonExpressionUnescape(expression.substring(index,field))));
+ // Parse the field name, and regular expression (if any)
+ StringBuilder fieldNameBuffer = new StringBuilder();
+ StringBuilder regExpBuffer = new StringBuilder();
+ StringBuilder groupNumberBuffer = new StringBuilder();
+ field = parseArgument(expression, field+2, fieldNameBuffer);
+ field = parseArgument(expression, field, regExpBuffer);
+ field = parseArgument(expression, field, groupNumberBuffer);
+ int fieldEnd = parseToEnd(expression, field);
+ if (fieldEnd == expression.length()) {
+ if (fieldNameBuffer.length() > 0)
+ return append(input, new FieldSource(sourceDocument, fieldNameBuffer.toString(), regExpBuffer.toString(), groupNumberBuffer.toString()));
+ return input;
} else {
- fieldName = expression.substring(field+2,fieldEnd);
- input = append(input, new FieldSource(sourceDocument, fieldName));
- index = fieldEnd+1;
+ if (fieldNameBuffer.length() > 0)
+ input = append(input, new FieldSource(sourceDocument, fieldNameBuffer.toString(), regExpBuffer.toString(), groupNumberBuffer.toString()));
+ index = fieldEnd;
+ }
+ }
+ }
+
+ protected static int parseArgument(final String input, int start, final StringBuilder output) {
+ // Parse until we hit the end marker or an unescaped pipe symbol
+ while (true) {
+ if (input.length() == start)
+ return start;
+ char theChar = input.charAt(start);
+ if (theChar == '}')
+ return start;
+ start++;
+ if (theChar == '|')
+ return start;
+ if (theChar == '\\') {
+ if (input.length() == start)
+ return start;
+ theChar = input.charAt(start++);
}
+ output.append(theChar);
}
}
+ protected static int parseToEnd(final String input, int start) {
+ while (true) {
+ if (input.length() == start)
+ return start;
+ char theChar = input.charAt(start++);
+ if (theChar == '}')
+ return start;
+ if (theChar == '\\') {
+ if (input.length() == start)
+ return start;
+ start++;
+ }
+ }
+ }
+
protected static class SpecPacker {
private final boolean keepAllMetadata;
@@ -725,7 +770,7 @@ public class ForcedMetadataConnector ext
addition = new HashSet<String>();
additions.put(parameter,addition);
}
- addition.add(expressionEscape(value));
+ addition.add(nonExpressionEscape(value));
}
else if (sn.getType().equals(NODE_EXPRESSION))
{
Modified: manifoldcf/trunk/connectors/forcedmetadata/connector/src/test/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ExpressionTest.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/forcedmetadata/connector/src/test/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ExpressionTest.java?rev=1684015&r1=1684014&r2=1684015&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/forcedmetadata/connector/src/test/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ExpressionTest.java (original)
+++ manifoldcf/trunk/connectors/forcedmetadata/connector/src/test/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ExpressionTest.java Sun Jun 7 10:02:15 2015
@@ -44,6 +44,8 @@ public class ExpressionTest
// Second access of reader fields, without prior string conversion, also must work
arrayEquals(new Reader[]{new StringReader("readera"),new StringReader("readerb")}, (Reader[])(ForcedMetadataConnector.processExpression("${readerfield}", fdf).getRawForm()));
arrayEquals(new String[]{"prefixreaderapostfix","prefixreaderbpostfix"}, (String[])(ForcedMetadataConnector.processExpression("prefix${readerfield}postfix", fdf).getRawForm()));
+ arrayEquals(new String[]{"prefixapostfix","prefixbpostfix","prefixcpostfix"}, (String[])(ForcedMetadataConnector.processExpression("prefix${stringfield|string([abc])|1}postfix", fdf).getRawForm()));
+ arrayEquals(new String[]{"prefixApostfix","prefixBpostfix","prefixCpostfix"}, (String[])(ForcedMetadataConnector.processExpression("prefix${stringfield|string([abc])|1u}postfix", fdf).getRawForm()));
} finally {
fdf.close();
}