You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/05/08 09:51:51 UTC

svn commit: r1593199 - /manifoldcf/branches/CONNECTORS-916/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java

Author: kwright
Date: Thu May  8 07:51:50 2014
New Revision: 1593199

URL: http://svn.apache.org/r1593199
Log:
More work on SpecPacker

Modified:
    manifoldcf/branches/CONNECTORS-916/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java

Modified: manifoldcf/branches/CONNECTORS-916/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-916/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java?rev=1593199&r1=1593198&r2=1593199&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-916/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java (original)
+++ manifoldcf/branches/CONNECTORS-916/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java Thu May  8 07:51:50 2014
@@ -20,7 +20,9 @@ package org.apache.manifoldcf.agents.out
 
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.InterruptedIOException;
+import java.io.InterruptedIOException;
+import java.io.StringReader;
+import java.io.BufferedReader;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Iterator;
@@ -923,6 +925,26 @@ public class AmazonCloudSearchConnector 
     
   }
   
+  protected static void fillSet(Set<String> set, String input) {
+    try
+    {
+      StringReader sr = new StringReader(input);
+      BufferedReader br = new BufferedReader(sr);
+      String line = null;
+      while ((line = br.readLine()) != null)
+      {
+        line = line.trim();
+        if (line.length() > 0)
+          set.add(line);
+      }
+    }
+    catch (IOException e)
+    {
+      // Should never happen
+      throw new RuntimeException("IO exception reading strings: "+e.getMessage(),e);
+    }
+  }
+  
   protected static class SpecPacker {
     
     private final Map<String,String> sourceTargets = new HashMap<String,String>();
@@ -933,6 +955,9 @@ public class AmazonCloudSearchConnector 
     
     public SpecPacker(OutputSpecification os) {
       boolean keepAllMetadata = true;
+      Long lengthCutoff = null;
+      String extensions = null;
+      String mimeTypes = null;
       for (int i = 0; i < os.getChildCount(); i++) {
         SpecificationNode sn = os.getChild(i);
         
@@ -947,11 +972,19 @@ public class AmazonCloudSearchConnector 
             target = "";
           }
           sourceTargets.put(source, target);
+        } else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_MIMETYPES)) {
+          mimeTypes = sn.getValue();
+        } else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_EXTENSIONS)) {
+          extensions = sn.getValue();
+        } else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_MAXLENGTH)) {
+          String value = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_VALUE);
+          lengthCutoff = new Long(value);
         }
       }
       this.keepAllMetadata = keepAllMetadata;
-      // MHL for mimetypes and extensions and length
-      this.lengthCutoff = null;
+      this.lengthCutoff = lengthCutoff;
+      fillSet(this.extensions, extensions);
+      fillSet(this.mimeTypes, mimeTypes);
     }
     
     public SpecPacker(String packedString) {
@@ -993,11 +1026,15 @@ public class AmazonCloudSearchConnector 
     }
     
     public boolean checkMimeType(String mimeType) {
+      if (mimeType == null)
+        mimeType = "application/unknown";
       return mimeTypes.contains(mimeType);
     }
     
     public boolean checkURLIndexable(String url) {
       String extension = FilenameUtils.getExtension(url);
+      if (extension == null || extension.length() == 0)
+        extension = ".";
       return extensions.contains(extension);
     }