You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2015/06/07 12:03:47 UTC

svn commit: r1684016 - in /manifoldcf/branches/dev_1x: ./ connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ connectors/forcedmetadata/connector/src/test/java/org/apache/manifoldcf/agents/trans...

Author: kwright
Date: Sun Jun  7 10:03:47 2015
New Revision: 1684016

URL: http://svn.apache.org/r1684016
Log:
Pull up fix for CONNECTORS-1209.

Modified:
    manifoldcf/branches/dev_1x/   (props changed)
    manifoldcf/branches/dev_1x/CHANGES.txt
    manifoldcf/branches/dev_1x/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/FieldSource.java
    manifoldcf/branches/dev_1x/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ForcedMetadataConnector.java
    manifoldcf/branches/dev_1x/connectors/forcedmetadata/connector/src/test/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ExpressionTest.java

Propchange: manifoldcf/branches/dev_1x/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Jun  7 10:03:47 2015
@@ -124,4 +124,4 @@
 /manifoldcf/branches/CONNECTORS-981:1605049-1605773
 /manifoldcf/branches/CONNECTORS-989:1611600-1612101
 /manifoldcf/branches/CONNECTORS-990:1610284-1610707
-/manifoldcf/trunk:1620703,1620748,1620812,1620862,1621449,1621613,1621855,1622213,1622740,1622850,1622853-1622854,1623249,1623251,1623314,1623599,1623951,1623953-1623954,1623956,1623972,1624058,1624085,1624174,1624236,1624377,1624384,1624399,1624449,1624464,1624504,1624729-1624731,1624906,1624909-1624910,1624982,1625023,1625095,1625103,1625108,1625264,1625270,1625394,1625400,1625910,1626090,1626097,1626102,1626638-1626639,1626973,1627687,1627690,1627959,1628046,1628066,1628106,1628168,1628188,1628699,1628798,1628808,1628845,1628905,1629122,1629374-1629375,1629379,1629541,1629994,1630188,1630535,1630623,1630671,1630812,1630885,1631039,1631162,1631164,1631252,1631750,1631953,1632013,1632225,1632289,1632562,1632844,1632847,1632854,1633062-1633063,1633108,1633193,1633202,1633282,1633284,1633295,1633336,1633339,1633345,1633348,1633364,1633378,1633383,1633432,1633546,1633590,1633634,1633668,1633727,1633760,1633764,1633786,1633910,1633923,1634021,1634028,1634067,1634132,1634145,1634148,163
 4155,1634188,1634202,1634264,1634373,1634530,1634688,1634850,1634857,1635103,1635116,1635421,1635438,1635478,1635481,1635484,1635490,1635809,1635939,1636146,1636167,1636180,1636207,1636215,1636232,1636334,1636519,1636570,1636684,1636940,1637011,1637310,1637350,1637364,1637373,1637378,1639259,1639593,1639600,1640018,1640101,1640199,1640204,1640208,1640314,1640319,1640749,1640772,1640805,1640888,1640925,1640941-1640942,1641222,1641328,1641557,1641559,1641629,1641633,1641724,1641754,1641911,1642163,1642255,1642318,1642531,1642650,1642658,1642673,1642716,1644197,1644399,1644538,1644920,1644931,1646317,1646397,1646403,1646408,1646640,1646947,1647574,1647585,1647608,1648686,1648976,1649201,1649203,1649529,1649605,1649628,1649794,1650351,1650722,1650741-1650742,1650745,1650747,1650911,1650954,1651332,1651539,1651907,1651921,1652071,1652974,1653175,1653899,1654651,1655205,1655261,1655264,1655377,1655411,1655618,1655914,1657346,1657443,1658004,1658036,1658121,1658155,1658188,1658463,1658476,
 1660258,1660276,1661454,1665848,1666160,1666781,1666820,1668312,1669100,1669238,1669487,1669523,1669586,1669660,1670614,1670625,1670715,1671496,1672169,1672301,1672616,1672737,1673559,1673573,1673579,1673722,1675781,1675898,1676094,1676882,1676910,1678300,1678329,1678471,1678551,1679730,1679826,1681390,1681735,1682232,1682252,1682410,1682602,1682622,1682719,1683208,1683506,1683529,1683768
+/manifoldcf/trunk:1620703,1620748,1620812,1620862,1621449,1621613,1621855,1622213,1622740,1622850,1622853-1622854,1623249,1623251,1623314,1623599,1623951,1623953-1623954,1623956,1623972,1624058,1624085,1624174,1624236,1624377,1624384,1624399,1624449,1624464,1624504,1624729-1624731,1624906,1624909-1624910,1624982,1625023,1625095,1625103,1625108,1625264,1625270,1625394,1625400,1625910,1626090,1626097,1626102,1626638-1626639,1626973,1627687,1627690,1627959,1628046,1628066,1628106,1628168,1628188,1628699,1628798,1628808,1628845,1628905,1629122,1629374-1629375,1629379,1629541,1629994,1630188,1630535,1630623,1630671,1630812,1630885,1631039,1631162,1631164,1631252,1631750,1631953,1632013,1632225,1632289,1632562,1632844,1632847,1632854,1633062-1633063,1633108,1633193,1633202,1633282,1633284,1633295,1633336,1633339,1633345,1633348,1633364,1633378,1633383,1633432,1633546,1633590,1633634,1633668,1633727,1633760,1633764,1633786,1633910,1633923,1634021,1634028,1634067,1634132,1634145,1634148,163
 4155,1634188,1634202,1634264,1634373,1634530,1634688,1634850,1634857,1635103,1635116,1635421,1635438,1635478,1635481,1635484,1635490,1635809,1635939,1636146,1636167,1636180,1636207,1636215,1636232,1636334,1636519,1636570,1636684,1636940,1637011,1637310,1637350,1637364,1637373,1637378,1639259,1639593,1639600,1640018,1640101,1640199,1640204,1640208,1640314,1640319,1640749,1640772,1640805,1640888,1640925,1640941-1640942,1641222,1641328,1641557,1641559,1641629,1641633,1641724,1641754,1641911,1642163,1642255,1642318,1642531,1642650,1642658,1642673,1642716,1644197,1644399,1644538,1644920,1644931,1646317,1646397,1646403,1646408,1646640,1646947,1647574,1647585,1647608,1648686,1648976,1649201,1649203,1649529,1649605,1649628,1649794,1650351,1650722,1650741-1650742,1650745,1650747,1650911,1650954,1651332,1651539,1651907,1651921,1652071,1652974,1653175,1653899,1654651,1655205,1655261,1655264,1655377,1655411,1655618,1655914,1657346,1657443,1658004,1658036,1658121,1658155,1658188,1658463,1658476,
 1660258,1660276,1661454,1665848,1666160,1666781,1666820,1668312,1669100,1669238,1669487,1669523,1669586,1669660,1670614,1670625,1670715,1671496,1672169,1672301,1672616,1672737,1673559,1673573,1673579,1673722,1675781,1675898,1676094,1676882,1676910,1678300,1678329,1678471,1678551,1679730,1679826,1681390,1681735,1682232,1682252,1682410,1682602,1682622,1682719,1683208,1683506,1683529,1683768,1684015

Modified: manifoldcf/branches/dev_1x/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/CHANGES.txt?rev=1684016&r1=1684015&r2=1684016&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/CHANGES.txt (original)
+++ manifoldcf/branches/dev_1x/CHANGES.txt Sun Jun  7 10:03:47 2015
@@ -3,6 +3,10 @@ $Id$
 
 ======================= 1.10-dev =====================
 
+CONNECTORS-1209: Add regular expression extraction to
+Metadata Adjuster.
+(Karl Wright)
+
 CONNECTORS-1210: List notifications page links broken.
 (KOIZUMI Satoru, Karl Wright)
 

Modified: manifoldcf/branches/dev_1x/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/FieldSource.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/FieldSource.java?rev=1684016&r1=1684015&r2=1684016&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/FieldSource.java (original)
+++ manifoldcf/branches/dev_1x/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/FieldSource.java Sun Jun  7 10:03:47 2015
@@ -23,15 +23,62 @@ import org.apache.manifoldcf.agents.inte
 
 import java.io.*;
 import java.util.*;
+import java.util.regex.*;
 
 public class FieldSource implements IDataSource {
-    
+  
+  protected final static int CASE_EXACT = 0;
+  protected final static int CASE_LOWER = 1;
+  protected final static int CASE_UPPER = 2;
+  
   protected final FieldDataFactory rd;
   protected final String fieldName;
-    
-  public FieldSource(FieldDataFactory rd, String fieldName) {
+  protected final Pattern regExpPattern;
+  protected final int groupNumber;
+  protected final int caseSpecifier;
+
+  protected String[] cachedValue;
+
+  public FieldSource(final FieldDataFactory rd, final String fieldName, final String regExp, final String groupNumber)
+    throws ManifoldCFException {
     this.rd = rd;
     this.fieldName = fieldName;
+    if (regExp == null || regExp.length() == 0) {
+      regExpPattern = null;
+      this.groupNumber = 0;
+      this.caseSpecifier = CASE_EXACT;
+    } else {
+      try {
+        this.regExpPattern = Pattern.compile(regExp);
+        if (groupNumber == null || groupNumber.length() == 0) {
+          this.groupNumber = 0;
+          this.caseSpecifier = CASE_EXACT;
+        } else {
+          final StringBuilder sb = new StringBuilder();
+          int caseResult = CASE_EXACT;
+          int i = 0;
+          while (i < groupNumber.length()) {
+            final char theChar = groupNumber.charAt(i++);
+            if (theChar >= '0' && theChar <= '9')
+              sb.append(theChar);
+            else if (theChar == 'l')
+              caseResult = CASE_LOWER;
+            else if (theChar == 'u')
+              caseResult = CASE_UPPER;
+            else
+              throw new ManifoldCFException("Regular expression group specifier '"+groupNumber+"' has illegal character '"+theChar+"'; should be a number, or number + l, or number + u");
+          }
+          if (sb.length() == 0)
+            throw new ManifoldCFException("Regular expression group specifier '"+groupNumber+"' must include a number");
+          this.caseSpecifier = caseResult;
+          this.groupNumber = Integer.parseInt(sb.toString());
+        }
+      } catch (NumberFormatException e) {
+        throw new ManifoldCFException("Regular expression group specifier '"+groupNumber+"': "+e.getMessage(),e);
+      } catch (PatternSyntaxException e) {
+        throw new ManifoldCFException("Regular expression '"+regExp+"': "+e.getMessage(),e);
+      }
+    }
   }
     
   @Override
@@ -46,12 +93,46 @@ public class FieldSource implements IDat
   @Override
   public Object[] getRawForm()
     throws IOException, ManifoldCFException {
+    if (regExpPattern != null) {
+      return calculateExtractedResult();
+    }
     return rd.getField(fieldName);
   }
     
   @Override
   public String[] getStringForm()
     throws IOException, ManifoldCFException {
+    if (regExpPattern != null) {
+      return calculateExtractedResult();
+    }
     return rd.getFieldAsStrings(fieldName);
   }
+  
+  protected String[] calculateExtractedResult()
+    throws IOException, ManifoldCFException {
+    if (cachedValue == null) {
+      final String[] resultSources = rd.getFieldAsStrings(fieldName);
+      final List<String> resultList = new ArrayList<String>(resultSources.length);
+      for (String x : resultSources) {
+        final Matcher m = regExpPattern.matcher(x);
+        if (m.find()) {
+          String result = x.substring(m.start(groupNumber),m.end(groupNumber));
+          switch (caseSpecifier) {
+          case CASE_LOWER:
+            result = result.toLowerCase(Locale.ROOT);
+            break;
+          case CASE_UPPER:
+            result = result.toUpperCase(Locale.ROOT);
+            break;
+          case CASE_EXACT:
+          default:
+            break;
+          }
+          resultList.add(result);
+        }
+      }
+      cachedValue = resultList.toArray(new String[0]);
+    }
+    return cachedValue;
+  }
 }

Modified: manifoldcf/branches/dev_1x/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ForcedMetadataConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ForcedMetadataConnector.java?rev=1684016&r1=1684015&r2=1684016&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ForcedMetadataConnector.java (original)
+++ manifoldcf/branches/dev_1x/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ForcedMetadataConnector.java Sun Jun  7 10:03:47 2015
@@ -542,7 +542,7 @@ public class ForcedMetadataConnector ext
           addition = new HashSet<String>();
           additions.put(parameter,addition);
         }
-        addition.add(expressionEscape(value));
+        addition.add(nonExpressionEscape(value));
       }
       else if (sn.getType().equals(NODE_EXPRESSION))
       {
@@ -619,12 +619,16 @@ public class ForcedMetadataConnector ext
     paramMap.put("FILTEREMPTY",filterEmptyValue);
   }
   
-  protected static String expressionEscape(String input) {
+  /** This is used to upgrade older constant values to new ones, that won't trigger expression eval.
+  */
+  protected static String nonExpressionEscape(String input) {
     // Not doing any escaping yet
     return input;
   }
 
-  protected static String expressionUnescape(String input) {
+  /** This is used to unescape text that's been escaped to prevent substitution of ${} expressions.
+  */
+  protected static String nonExpressionUnescape(String input) {
     // Not doing any escaping yet
     return input;
   }
@@ -664,23 +668,64 @@ public class ForcedMetadataConnector ext
       // Look for next field specification
       int field = expression.indexOf("${",index);
       if (field == -1)
-        return append(input, new StringSource(expressionUnescape(expression.substring(index))));
+        return append(input, new StringSource(nonExpressionUnescape(expression.substring(index))));
       if (field > 0)
-        input = append(input, new StringSource(expressionUnescape(expression.substring(index,field))));
-      // Get the field name
-      int fieldEnd = expression.indexOf("}",field);
-      String fieldName;
-      if (fieldEnd == -1) {
-        fieldName = expression.substring(field+2);
-        return append(input, new FieldSource(sourceDocument, fieldName));
+        input = append(input, new StringSource(nonExpressionUnescape(expression.substring(index,field))));
+      // Parse the field name, and regular expression (if any)
+      StringBuilder fieldNameBuffer = new StringBuilder();
+      StringBuilder regExpBuffer = new StringBuilder();
+      StringBuilder groupNumberBuffer = new StringBuilder();
+      field = parseArgument(expression, field+2, fieldNameBuffer);
+      field = parseArgument(expression, field, regExpBuffer);
+      field = parseArgument(expression, field, groupNumberBuffer);
+      int fieldEnd = parseToEnd(expression, field);
+      if (fieldEnd == expression.length()) {
+        if (fieldNameBuffer.length() > 0)
+          return append(input, new FieldSource(sourceDocument, fieldNameBuffer.toString(), regExpBuffer.toString(), groupNumberBuffer.toString()));
+        return input;
       } else {
-        fieldName = expression.substring(field+2,fieldEnd);
-        input = append(input, new FieldSource(sourceDocument, fieldName));
-        index = fieldEnd+1;
+        if (fieldNameBuffer.length() > 0)
+          input = append(input, new FieldSource(sourceDocument, fieldNameBuffer.toString(), regExpBuffer.toString(), groupNumberBuffer.toString()));
+        index = fieldEnd;
+      }
+    }
+  }
+  
+  protected static int parseArgument(final String input, int start, final StringBuilder output) {
+    // Parse until we hit the end marker or an unescaped pipe symbol
+    while (true) {
+      if (input.length() == start)
+        return start;
+      char theChar = input.charAt(start);
+      if (theChar == '}')
+        return start;
+      start++;
+      if (theChar == '|')
+        return start;
+      if (theChar == '\\') {
+        if (input.length() == start)
+          return start;
+        theChar = input.charAt(start++);
       }
+      output.append(theChar);
     }
   }
   
+  protected static int parseToEnd(final String input, int start) {
+    while (true) {
+      if (input.length() == start)
+        return start;
+      char theChar = input.charAt(start++);
+      if (theChar == '}')
+        return start;
+      if (theChar == '\\') {
+        if (input.length() == start)
+          return start;
+        start++;
+      }
+    }
+  }
+
   protected static class SpecPacker {
     
     private final boolean keepAllMetadata;
@@ -725,7 +770,7 @@ public class ForcedMetadataConnector ext
             addition = new HashSet<String>();
             additions.put(parameter,addition);
           }
-          addition.add(expressionEscape(value));
+          addition.add(nonExpressionEscape(value));
         }
         else if (sn.getType().equals(NODE_EXPRESSION))
         {

Modified: manifoldcf/branches/dev_1x/connectors/forcedmetadata/connector/src/test/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ExpressionTest.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/connectors/forcedmetadata/connector/src/test/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ExpressionTest.java?rev=1684016&r1=1684015&r2=1684016&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/connectors/forcedmetadata/connector/src/test/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ExpressionTest.java (original)
+++ manifoldcf/branches/dev_1x/connectors/forcedmetadata/connector/src/test/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ExpressionTest.java Sun Jun  7 10:03:47 2015
@@ -44,6 +44,8 @@ public class ExpressionTest
       // Second access of reader fields, without prior string conversion, also must work
       arrayEquals(new Reader[]{new StringReader("readera"),new StringReader("readerb")}, (Reader[])(ForcedMetadataConnector.processExpression("${readerfield}", fdf).getRawForm()));
       arrayEquals(new String[]{"prefixreaderapostfix","prefixreaderbpostfix"}, (String[])(ForcedMetadataConnector.processExpression("prefix${readerfield}postfix", fdf).getRawForm()));
+      arrayEquals(new String[]{"prefixapostfix","prefixbpostfix","prefixcpostfix"}, (String[])(ForcedMetadataConnector.processExpression("prefix${stringfield|string([abc])|1}postfix", fdf).getRawForm()));
+      arrayEquals(new String[]{"prefixApostfix","prefixBpostfix","prefixCpostfix"}, (String[])(ForcedMetadataConnector.processExpression("prefix${stringfield|string([abc])|1u}postfix", fdf).getRawForm()));
     } finally {
       fdf.close();
     }