You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/10/09 02:27:55 UTC

svn commit: r1630261 - in /manifoldcf/branches/CONNECTORS-1068/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter: DocumentFilter.java DocumentFilterConfig.java

Author: kwright
Date: Thu Oct  9 00:27:55 2014
New Revision: 1630261

URL: http://svn.apache.org/r1630261
Log:
Add wildcards for extension and mimetype filtering

Modified:
    manifoldcf/branches/CONNECTORS-1068/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java
    manifoldcf/branches/CONNECTORS-1068/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilterConfig.java

Modified: manifoldcf/branches/CONNECTORS-1068/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1068/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java?rev=1630261&r1=1630260&r2=1630261&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1068/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java (original)
+++ manifoldcf/branches/CONNECTORS-1068/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java Thu Oct  9 00:27:55 2014
@@ -375,7 +375,8 @@ public class DocumentFilter extends org.
     
   }
   
-  protected static void fillSet(Set<String> set, String input) {
+  protected static Set<String> fillSet(String input) {
+    Set<String> rval = new HashSet<String>();
     try
     {
       StringReader sr = new StringReader(input);
@@ -384,8 +385,10 @@ public class DocumentFilter extends org.
       while ((line = br.readLine()) != null)
       {
         line = line.trim();
-        if (line.length() > 0)
-          set.add(line.toLowerCase(Locale.ROOT));
+        if (line.equals("*"))
+          rval = null;
+        else if (rval != null && line.length() > 0)
+          rval.add(line.toLowerCase(Locale.ROOT));
       }
     }
     catch (IOException e)
@@ -393,12 +396,15 @@ public class DocumentFilter extends org.
       // Should never happen
       throw new RuntimeException("IO exception reading strings: "+e.getMessage(),e);
     }
+    return rval;
   }
   
   protected static class SpecPacker {
     
-    private final Set<String> extensions = new HashSet<String>();
-    private final Set<String> mimeTypes = new HashSet<String>();
+    // null means "match everything"
+    private final Set<String> extensions;
+    // null means "match everything"
+    private final Set<String> mimeTypes;
     private final Long minLength;
     private final Long lengthCutoff;
     
@@ -424,8 +430,8 @@ public class DocumentFilter extends org.
       }
       this.minLength = minLength;
       this.lengthCutoff = lengthCutoff;
-      fillSet(this.extensions, extensions);
-      fillSet(this.mimeTypes, mimeTypes);
+      this.extensions = fillSet(extensions);
+      this.mimeTypes = fillSet(mimeTypes);
     }
     
     public String toPackedString() {
@@ -441,22 +447,34 @@ public class DocumentFilter extends org.
       }
       
       // Mime types
-      String[] mimeTypes = new String[this.mimeTypes.size()];
-      i = 0;
-      for (String mimeType : this.mimeTypes) {
-        mimeTypes[i++] = mimeType;
+      if (this.mimeTypes == null)
+        sb.append('-');
+      else
+      {
+        sb.append('+');
+        String[] mimeTypes = new String[this.mimeTypes.size()];
+        i = 0;
+        for (String mimeType : this.mimeTypes) {
+          mimeTypes[i++] = mimeType;
+        }
+        java.util.Arrays.sort(mimeTypes);
+        packList(sb,mimeTypes,'+');
       }
-      java.util.Arrays.sort(mimeTypes);
-      packList(sb,mimeTypes,'+');
       
       // Extensions
-      String[] extensions = new String[this.extensions.size()];
-      i = 0;
-      for (String extension : this.extensions) {
-        extensions[i++] = extension;
+      if (this.extensions == null)
+        sb.append('-');
+      else
+      {
+        sb.append('+');
+        String[] extensions = new String[this.extensions.size()];
+        i = 0;
+        for (String extension : this.extensions) {
+          extensions[i++] = extension;
+        }
+        java.util.Arrays.sort(extensions);
+        packList(sb,extensions,'+');
       }
-      java.util.Arrays.sort(extensions);
-      packList(sb,extensions,'+');
 
       // Min length
       if (minLength == null)
@@ -485,6 +503,8 @@ public class DocumentFilter extends org.
     public boolean checkMimeType(String mimeType) {
       if (mimeType == null)
         mimeType = "application/unknown";
+      if (mimeTypes == null)
+        return true;
       return mimeTypes.contains(mimeType.toLowerCase(Locale.ROOT));
     }
     
@@ -502,6 +522,8 @@ public class DocumentFilter extends org.
       }
       if (extension == null || extension.length() == 0)
         extension = ".";
+      if (extensions == null)
+        return true;
       return extensions.contains(extension.toLowerCase(Locale.ROOT));
     }
     

Modified: manifoldcf/branches/CONNECTORS-1068/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilterConfig.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1068/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilterConfig.java?rev=1630261&r1=1630260&r2=1630261&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1068/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilterConfig.java (original)
+++ manifoldcf/branches/CONNECTORS-1068/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilterConfig.java Thu Oct  9 00:27:55 2014
@@ -29,10 +29,11 @@ public class DocumentFilterConfig {
   public static final String NODE_MINLENGTH = "minlength";
   public static final String MINLENGTH_DEFAULT = "0";
   public static final String NODE_MAXLENGTH = "maxlength";
-  public static final String MAXLENGTH_DEFAULT = "16777216";
+  public static final String MAXLENGTH_DEFAULT = "1000000000";
   public static final String NODE_MIMETYPES = "mimetypes";
   public static final String MIMETYPES_DEFAULT =
-                        "application/msword\n"
+                        "*\n"
+                        + "application/msword\n"
 		        + "application/vnd.ms-excel\n"
 		        + "application/vnd.openxmlformats-officedocument.wordprocessingml.document\n"
 		        + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet\n"
@@ -48,7 +49,7 @@ public class DocumentFilterConfig {
 		        + "application/x-bittorrent";
   public static final String NODE_EXTENSIONS = "extensions";
   public static final String EXTENSIONS_DEFAULT =
-                    "doc\n" + "docx\n" + "xls\n" + "xlsx\n" + "ppt\n" + "pptx\n"
+                    "*\n" + "log\n" + "doc\n" + "docx\n" + "xls\n" + "xlsx\n" + "ppt\n" + "pptx\n"
 		    + "html\n" + "pdf\n" + "odt\n" + "ods\n" + "rtf\n" + "txt\n" + "mp3\n"
 		    + "mp4\n" + "wav\n" + "ogg\n" + "flac\n" + "torrent";
   public static final String ATTRIBUTE_VALUE = "value";