You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2015/06/07 12:03:47 UTC

svn commit: r1684016 - in /manifoldcf/branches/dev_1x: ./ connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ connectors/forcedmetadata/connector/src/test/java/org/apache/manifoldcf/agents/trans...

Author: kwright
Date: Sun Jun  7 10:03:47 2015
New Revision: 1684016

URL: http://svn.apache.org/r1684016
Log:
Pull up fix for CONNECTORS-1209.

Modified:
    manifoldcf/branches/dev_1x/   (props changed)
    manifoldcf/branches/dev_1x/CHANGES.txt
    manifoldcf/branches/dev_1x/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/FieldSource.java
    manifoldcf/branches/dev_1x/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ForcedMetadataConnector.java
    manifoldcf/branches/dev_1x/connectors/forcedmetadata/connector/src/test/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ExpressionTest.java

Propchange: manifoldcf/branches/dev_1x/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Jun  7 10:03:47 2015
@@ -124,4 +124,4 @@
 /manifoldcf/branches/CONNECTORS-981:1605049-1605773
 /manifoldcf/branches/CONNECTORS-989:1611600-1612101
 /manifoldcf/branches/CONNECTORS-990:1610284-1610707
-/manifoldcf/trunk

 1660258,1660276,1661454,1665848,1666160,1666781,1666820,1668312,1669100,1669238,1669487,1669523,1669586,1669660,1670614,1670625,1670715,1671496,1672169,1672301,1672616,1672737,1673559,1673573,1673579,1673722,1675781,1675898,1676094,1676882,1676910,1678300,1678329,1678471,1678551,1679730,1679826,1681390,1681735,1682232,1682252,1682410,1682602,1682622,1682719,1683208,1683506,1683529,1683768
+/manifoldcf/trunk

 1660258,1660276,1661454,1665848,1666160,1666781,1666820,1668312,1669100,1669238,1669487,1669523,1669586,1669660,1670614,1670625,1670715,1671496,1672169,1672301,1672616,1672737,1673559,1673573,1673579,1673722,1675781,1675898,1676094,1676882,1676910,1678300,1678329,1678471,1678551,1679730,1679826,1681390,1681735,1682232,1682252,1682410,1682602,1682622,1682719,1683208,1683506,1683529,1683768,1684015

Modified: manifoldcf/branches/dev_1x/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/CHANGES.txt?rev=1684016&r1=1684015&r2=1684016&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/CHANGES.txt (original)
+++ manifoldcf/branches/dev_1x/CHANGES.txt Sun Jun  7 10:03:47 2015
@@ -3,6 +3,10 @@ $Id$
 
 ======================= 1.10-dev =====================
 
+CONNECTORS-1209: Add regular expression extraction to
+Metadata Adjuster.
+(Karl Wright)
+
 CONNECTORS-1210: List notifications page links broken.
 (KOIZUMI Satoru, Karl Wright)
 

Modified: manifoldcf/branches/dev_1x/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/FieldSource.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/FieldSource.java?rev=1684016&r1=1684015&r2=1684016&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/FieldSource.java (original)
+++ manifoldcf/branches/dev_1x/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/FieldSource.java Sun Jun  7 10:03:47 2015
@@ -23,15 +23,62 @@ import org.apache.manifoldcf.agents.inte
 
 import java.io.*;
 import java.util.*;
+import java.util.regex.*;
 
 public class FieldSource implements IDataSource {
-    
+  
+  protected final static int CASE_EXACT = 0;
+  protected final static int CASE_LOWER = 1;
+  protected final static int CASE_UPPER = 2;
+  
   protected final FieldDataFactory rd;
   protected final String fieldName;
-    
-  public FieldSource(FieldDataFactory rd, String fieldName) {
+  protected final Pattern regExpPattern;
+  protected final int groupNumber;
+  protected final int caseSpecifier;
+
+  protected String[] cachedValue;
+
+  public FieldSource(final FieldDataFactory rd, final String fieldName, final String regExp, final String groupNumber)
+    throws ManifoldCFException {
     this.rd = rd;
     this.fieldName = fieldName;
+    if (regExp == null || regExp.length() == 0) {
+      regExpPattern = null;
+      this.groupNumber = 0;
+      this.caseSpecifier = CASE_EXACT;
+    } else {
+      try {
+        this.regExpPattern = Pattern.compile(regExp);
+        if (groupNumber == null || groupNumber.length() == 0) {
+          this.groupNumber = 0;
+          this.caseSpecifier = CASE_EXACT;
+        } else {
+          final StringBuilder sb = new StringBuilder();
+          int caseResult = CASE_EXACT;
+          int i = 0;
+          while (i < groupNumber.length()) {
+            final char theChar = groupNumber.charAt(i++);
+            if (theChar >= '0' && theChar <= '9')
+              sb.append(theChar);
+            else if (theChar == 'l')
+              caseResult = CASE_LOWER;
+            else if (theChar == 'u')
+              caseResult = CASE_UPPER;
+            else
+              throw new ManifoldCFException("Regular expression group specifier '"+groupNumber+"' has illegal character '"+theChar+"'; should be a number, or number + l, or number + u");
+          }
+          if (sb.length() == 0)
+            throw new ManifoldCFException("Regular expression group specifier '"+groupNumber+"' must include a number");
+          this.caseSpecifier = caseResult;
+          this.groupNumber = Integer.parseInt(sb.toString());
+        }
+      } catch (NumberFormatException e) {
+        throw new ManifoldCFException("Regular expression group specifier '"+groupNumber+"': "+e.getMessage(),e);
+      } catch (PatternSyntaxException e) {
+        throw new ManifoldCFException("Regular expression '"+regExp+"': "+e.getMessage(),e);
+      }
+    }
   }
     
   @Override
@@ -46,12 +93,46 @@ public class FieldSource implements IDat
   @Override
   public Object[] getRawForm()
     throws IOException, ManifoldCFException {
+    if (regExpPattern != null) {
+      return calculateExtractedResult();
+    }
     return rd.getField(fieldName);
   }
     
   @Override
   public String[] getStringForm()
     throws IOException, ManifoldCFException {
+    if (regExpPattern != null) {
+      return calculateExtractedResult();
+    }
     return rd.getFieldAsStrings(fieldName);
   }
+  
+  protected String[] calculateExtractedResult()
+    throws IOException, ManifoldCFException {
+    if (cachedValue == null) {
+      final String[] resultSources = rd.getFieldAsStrings(fieldName);
+      final List<String> resultList = new ArrayList<String>(resultSources.length);
+      for (String x : resultSources) {
+        final Matcher m = regExpPattern.matcher(x);
+        if (m.find()) {
+          String result = x.substring(m.start(groupNumber),m.end(groupNumber));
+          switch (caseSpecifier) {
+          case CASE_LOWER:
+            result = result.toLowerCase(Locale.ROOT);
+            break;
+          case CASE_UPPER:
+            result = result.toUpperCase(Locale.ROOT);
+            break;
+          case CASE_EXACT:
+          default:
+            break;
+          }
+          resultList.add(result);
+        }
+      }
+      cachedValue = resultList.toArray(new String[0]);
+    }
+    return cachedValue;
+  }
 }

Modified: manifoldcf/branches/dev_1x/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ForcedMetadataConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ForcedMetadataConnector.java?rev=1684016&r1=1684015&r2=1684016&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ForcedMetadataConnector.java (original)
+++ manifoldcf/branches/dev_1x/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ForcedMetadataConnector.java Sun Jun  7 10:03:47 2015
@@ -542,7 +542,7 @@ public class ForcedMetadataConnector ext
           addition = new HashSet<String>();
           additions.put(parameter,addition);
         }
-        addition.add(expressionEscape(value));
+        addition.add(nonExpressionEscape(value));
       }
       else if (sn.getType().equals(NODE_EXPRESSION))
       {
@@ -619,12 +619,16 @@ public class ForcedMetadataConnector ext
     paramMap.put("FILTEREMPTY",filterEmptyValue);
   }
   
-  protected static String expressionEscape(String input) {
+  /** This is used to upgrade older constant values to new ones, that won't trigger expression eval.
+  */
+  protected static String nonExpressionEscape(String input) {
     // Not doing any escaping yet
     return input;
   }
 
-  protected static String expressionUnescape(String input) {
+  /** This is used to unescape text that's been escaped to prevent substitution of ${} expressions.
+  */
+  protected static String nonExpressionUnescape(String input) {
     // Not doing any escaping yet
     return input;
   }
@@ -664,23 +668,64 @@ public class ForcedMetadataConnector ext
       // Look for next field specification
       int field = expression.indexOf("${",index);
       if (field == -1)
-        return append(input, new StringSource(expressionUnescape(expression.substring(index))));
+        return append(input, new StringSource(nonExpressionUnescape(expression.substring(index))));
       if (field > 0)
-        input = append(input, new StringSource(expressionUnescape(expression.substring(index,field))));
-      // Get the field name
-      int fieldEnd = expression.indexOf("}",field);
-      String fieldName;
-      if (fieldEnd == -1) {
-        fieldName = expression.substring(field+2);
-        return append(input, new FieldSource(sourceDocument, fieldName));
+        input = append(input, new StringSource(nonExpressionUnescape(expression.substring(index,field))));
+      // Parse the field name, and regular expression (if any)
+      StringBuilder fieldNameBuffer = new StringBuilder();
+      StringBuilder regExpBuffer = new StringBuilder();
+      StringBuilder groupNumberBuffer = new StringBuilder();
+      field = parseArgument(expression, field+2, fieldNameBuffer);
+      field = parseArgument(expression, field, regExpBuffer);
+      field = parseArgument(expression, field, groupNumberBuffer);
+      int fieldEnd = parseToEnd(expression, field);
+      if (fieldEnd == expression.length()) {
+        if (fieldNameBuffer.length() > 0)
+          return append(input, new FieldSource(sourceDocument, fieldNameBuffer.toString(), regExpBuffer.toString(), groupNumberBuffer.toString()));
+        return input;
       } else {
-        fieldName = expression.substring(field+2,fieldEnd);
-        input = append(input, new FieldSource(sourceDocument, fieldName));
-        index = fieldEnd+1;
+        if (fieldNameBuffer.length() > 0)
+          input = append(input, new FieldSource(sourceDocument, fieldNameBuffer.toString(), regExpBuffer.toString(), groupNumberBuffer.toString()));
+        index = fieldEnd;
+      }
+    }
+  }
+  
+  protected static int parseArgument(final String input, int start, final StringBuilder output) {
+    // Parse until we hit the end marker or an unescaped pipe symbol
+    while (true) {
+      if (input.length() == start)
+        return start;
+      char theChar = input.charAt(start);
+      if (theChar == '}')
+        return start;
+      start++;
+      if (theChar == '|')
+        return start;
+      if (theChar == '\\') {
+        if (input.length() == start)
+          return start;
+        theChar = input.charAt(start++);
       }
+      output.append(theChar);
     }
   }
   
+  protected static int parseToEnd(final String input, int start) {
+    while (true) {
+      if (input.length() == start)
+        return start;
+      char theChar = input.charAt(start++);
+      if (theChar == '}')
+        return start;
+      if (theChar == '\\') {
+        if (input.length() == start)
+          return start;
+        start++;
+      }
+    }
+  }
+
   protected static class SpecPacker {
     
     private final boolean keepAllMetadata;
@@ -725,7 +770,7 @@ public class ForcedMetadataConnector ext
             addition = new HashSet<String>();
             additions.put(parameter,addition);
           }
-          addition.add(expressionEscape(value));
+          addition.add(nonExpressionEscape(value));
         }
         else if (sn.getType().equals(NODE_EXPRESSION))
         {

Modified: manifoldcf/branches/dev_1x/connectors/forcedmetadata/connector/src/test/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ExpressionTest.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/connectors/forcedmetadata/connector/src/test/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ExpressionTest.java?rev=1684016&r1=1684015&r2=1684016&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/connectors/forcedmetadata/connector/src/test/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ExpressionTest.java (original)
+++ manifoldcf/branches/dev_1x/connectors/forcedmetadata/connector/src/test/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ExpressionTest.java Sun Jun  7 10:03:47 2015
@@ -44,6 +44,8 @@ public class ExpressionTest
       // Second access of reader fields, without prior string conversion, also must work
       arrayEquals(new Reader[]{new StringReader("readera"),new StringReader("readerb")}, (Reader[])(ForcedMetadataConnector.processExpression("${readerfield}", fdf).getRawForm()));
       arrayEquals(new String[]{"prefixreaderapostfix","prefixreaderbpostfix"}, (String[])(ForcedMetadataConnector.processExpression("prefix${readerfield}postfix", fdf).getRawForm()));
+      arrayEquals(new String[]{"prefixapostfix","prefixbpostfix","prefixcpostfix"}, (String[])(ForcedMetadataConnector.processExpression("prefix${stringfield|string([abc])|1}postfix", fdf).getRawForm()));
+      arrayEquals(new String[]{"prefixApostfix","prefixBpostfix","prefixCpostfix"}, (String[])(ForcedMetadataConnector.processExpression("prefix${stringfield|string([abc])|1u}postfix", fdf).getRawForm()));
     } finally {
       fdf.close();
     }