You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/10/09 02:27:55 UTC
svn commit: r1630261 - in
/manifoldcf/branches/CONNECTORS-1068/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter:
DocumentFilter.java DocumentFilterConfig.java
Author: kwright
Date: Thu Oct 9 00:27:55 2014
New Revision: 1630261
URL: http://svn.apache.org/r1630261
Log:
Add wildcards for extension and mimetype filtering
Modified:
manifoldcf/branches/CONNECTORS-1068/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java
manifoldcf/branches/CONNECTORS-1068/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilterConfig.java
Modified: manifoldcf/branches/CONNECTORS-1068/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1068/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java?rev=1630261&r1=1630260&r2=1630261&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1068/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java (original)
+++ manifoldcf/branches/CONNECTORS-1068/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java Thu Oct 9 00:27:55 2014
@@ -375,7 +375,8 @@ public class DocumentFilter extends org.
}
- protected static void fillSet(Set<String> set, String input) {
+ protected static Set<String> fillSet(String input) {
+ Set<String> rval = new HashSet<String>();
try
{
StringReader sr = new StringReader(input);
@@ -384,8 +385,10 @@ public class DocumentFilter extends org.
while ((line = br.readLine()) != null)
{
line = line.trim();
- if (line.length() > 0)
- set.add(line.toLowerCase(Locale.ROOT));
+ if (line.equals("*"))
+ rval = null;
+ else if (rval != null && line.length() > 0)
+ rval.add(line.toLowerCase(Locale.ROOT));
}
}
catch (IOException e)
@@ -393,12 +396,15 @@ public class DocumentFilter extends org.
// Should never happen
throw new RuntimeException("IO exception reading strings: "+e.getMessage(),e);
}
+ return rval;
}
protected static class SpecPacker {
- private final Set<String> extensions = new HashSet<String>();
- private final Set<String> mimeTypes = new HashSet<String>();
+ // null means "match everything"
+ private final Set<String> extensions;
+ // null means "match everything"
+ private final Set<String> mimeTypes;
private final Long minLength;
private final Long lengthCutoff;
@@ -424,8 +430,8 @@ public class DocumentFilter extends org.
}
this.minLength = minLength;
this.lengthCutoff = lengthCutoff;
- fillSet(this.extensions, extensions);
- fillSet(this.mimeTypes, mimeTypes);
+ this.extensions = fillSet(extensions);
+ this.mimeTypes = fillSet(mimeTypes);
}
public String toPackedString() {
@@ -441,22 +447,34 @@ public class DocumentFilter extends org.
}
// Mime types
- String[] mimeTypes = new String[this.mimeTypes.size()];
- i = 0;
- for (String mimeType : this.mimeTypes) {
- mimeTypes[i++] = mimeType;
+ if (this.mimeTypes == null)
+ sb.append('-');
+ else
+ {
+ sb.append('+');
+ String[] mimeTypes = new String[this.mimeTypes.size()];
+ i = 0;
+ for (String mimeType : this.mimeTypes) {
+ mimeTypes[i++] = mimeType;
+ }
+ java.util.Arrays.sort(mimeTypes);
+ packList(sb,mimeTypes,'+');
}
- java.util.Arrays.sort(mimeTypes);
- packList(sb,mimeTypes,'+');
// Extensions
- String[] extensions = new String[this.extensions.size()];
- i = 0;
- for (String extension : this.extensions) {
- extensions[i++] = extension;
+ if (this.extensions == null)
+ sb.append('-');
+ else
+ {
+ sb.append('+');
+ String[] extensions = new String[this.extensions.size()];
+ i = 0;
+ for (String extension : this.extensions) {
+ extensions[i++] = extension;
+ }
+ java.util.Arrays.sort(extensions);
+ packList(sb,extensions,'+');
}
- java.util.Arrays.sort(extensions);
- packList(sb,extensions,'+');
// Min length
if (minLength == null)
@@ -485,6 +503,8 @@ public class DocumentFilter extends org.
public boolean checkMimeType(String mimeType) {
if (mimeType == null)
mimeType = "application/unknown";
+ if (mimeTypes == null)
+ return true;
return mimeTypes.contains(mimeType.toLowerCase(Locale.ROOT));
}
@@ -502,6 +522,8 @@ public class DocumentFilter extends org.
}
if (extension == null || extension.length() == 0)
extension = ".";
+ if (extensions == null)
+ return true;
return extensions.contains(extension.toLowerCase(Locale.ROOT));
}
Modified: manifoldcf/branches/CONNECTORS-1068/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilterConfig.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1068/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilterConfig.java?rev=1630261&r1=1630260&r2=1630261&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1068/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilterConfig.java (original)
+++ manifoldcf/branches/CONNECTORS-1068/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilterConfig.java Thu Oct 9 00:27:55 2014
@@ -29,10 +29,11 @@ public class DocumentFilterConfig {
public static final String NODE_MINLENGTH = "minlength";
public static final String MINLENGTH_DEFAULT = "0";
public static final String NODE_MAXLENGTH = "maxlength";
- public static final String MAXLENGTH_DEFAULT = "16777216";
+ public static final String MAXLENGTH_DEFAULT = "1000000000";
public static final String NODE_MIMETYPES = "mimetypes";
public static final String MIMETYPES_DEFAULT =
- "application/msword\n"
+ "*\n"
+ + "application/msword\n"
+ "application/vnd.ms-excel\n"
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document\n"
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet\n"
@@ -48,7 +49,7 @@ public class DocumentFilterConfig {
+ "application/x-bittorrent";
public static final String NODE_EXTENSIONS = "extensions";
public static final String EXTENSIONS_DEFAULT =
- "doc\n" + "docx\n" + "xls\n" + "xlsx\n" + "ppt\n" + "pptx\n"
+ "*\n" + "log\n" + "doc\n" + "docx\n" + "xls\n" + "xlsx\n" + "ppt\n" + "pptx\n"
+ "html\n" + "pdf\n" + "odt\n" + "ods\n" + "rtf\n" + "txt\n" + "mp3\n"
+ "mp4\n" + "wav\n" + "ogg\n" + "flac\n" + "torrent";
public static final String ATTRIBUTE_VALUE = "value";