You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/05/08 09:51:51 UTC
svn commit: r1593199 -
/manifoldcf/branches/CONNECTORS-916/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java
Author: kwright
Date: Thu May 8 07:51:50 2014
New Revision: 1593199
URL: http://svn.apache.org/r1593199
Log:
More work on SpecPacker
Modified:
manifoldcf/branches/CONNECTORS-916/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java
Modified: manifoldcf/branches/CONNECTORS-916/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-916/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java?rev=1593199&r1=1593198&r2=1593199&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-916/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java (original)
+++ manifoldcf/branches/CONNECTORS-916/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java Thu May 8 07:51:50 2014
@@ -20,7 +20,9 @@ package org.apache.manifoldcf.agents.out
import java.io.IOException;
import java.io.InputStream;
-import java.io.InterruptedIOException;
+import java.io.InterruptedIOException;
+import java.io.StringReader;
+import java.io.BufferedReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
@@ -923,6 +925,26 @@ public class AmazonCloudSearchConnector
}
+ protected static void fillSet(Set<String> set, String input) {
+ try
+ {
+ StringReader sr = new StringReader(input);
+ BufferedReader br = new BufferedReader(sr);
+ String line = null;
+ while ((line = br.readLine()) != null)
+ {
+ line = line.trim();
+ if (line.length() > 0)
+ set.add(line);
+ }
+ }
+ catch (IOException e)
+ {
+ // Should never happen
+ throw new RuntimeException("IO exception reading strings: "+e.getMessage(),e);
+ }
+ }
+
protected static class SpecPacker {
private final Map<String,String> sourceTargets = new HashMap<String,String>();
@@ -933,6 +955,9 @@ public class AmazonCloudSearchConnector
public SpecPacker(OutputSpecification os) {
boolean keepAllMetadata = true;
+ Long lengthCutoff = null;
+ String extensions = null;
+ String mimeTypes = null;
for (int i = 0; i < os.getChildCount(); i++) {
SpecificationNode sn = os.getChild(i);
@@ -947,11 +972,19 @@ public class AmazonCloudSearchConnector
target = "";
}
sourceTargets.put(source, target);
+ } else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_MIMETYPES)) {
+ mimeTypes = sn.getValue();
+ } else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_EXTENSIONS)) {
+ extensions = sn.getValue();
+ } else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_MAXLENGTH)) {
+ String value = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_VALUE);
+ lengthCutoff = new Long(value);
}
}
this.keepAllMetadata = keepAllMetadata;
- // MHL for mimetypes and extensions and length
- this.lengthCutoff = null;
+ this.lengthCutoff = lengthCutoff;
+ fillSet(this.extensions, extensions);
+ fillSet(this.mimeTypes, mimeTypes);
}
public SpecPacker(String packedString) {
@@ -993,11 +1026,15 @@ public class AmazonCloudSearchConnector
}
public boolean checkMimeType(String mimeType) {
+ if (mimeType == null)
+ mimeType = "application/unknown";
return mimeTypes.contains(mimeType);
}
public boolean checkURLIndexable(String url) {
String extension = FilenameUtils.getExtension(url);
+ if (extension == null || extension.length() == 0)
+ extension = ".";
return extensions.contains(extension);
}