You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2013/07/05 15:21:49 UTC

svn commit: r1499996 - in /manifoldcf/trunk: ./ connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/ connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/

Author: kwright
Date: Fri Jul  5 13:21:49 2013
New Revision: 1499996

URL: http://svn.apache.org/r1499996
Log:
Fix for CONNECTORS-748.

Modified:
    manifoldcf/trunk/CHANGES.txt
    manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputConnector.java
    manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java

Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1499996&r1=1499995&r2=1499996&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Fri Jul  5 13:21:49 2013
@@ -3,6 +3,11 @@ $Id$
 
 ======================= 1.3-dev =====================
 
+CONNECTORS-748: Fix broken regular expressions in file connector, and
+modify file output connector to deal with colons in the file name in an
+acceptable way (so can be used on WIndows too).
+(Minoru Osuka, Karl Wright)
+
 CONNECTORS-729: Break up Jira URL into components, with proper
 javascript checking.
 (Karl Wright)

Modified: manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputConnector.java?rev=1499996&r1=1499995&r2=1499996&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputConnector.java (original)
+++ manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputConnector.java Fri Jul  5 13:21:49 2013
@@ -23,6 +23,7 @@ import java.io.FileInputStream;
 import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.io.InterruptedIOException;
 import java.io.InputStream;
 import java.io.UnsupportedEncodingException;
 import java.net.URI;
@@ -41,6 +42,7 @@ import org.apache.manifoldcf.agents.inte
 import org.apache.manifoldcf.agents.interfaces.RepositoryDocument;
 import org.apache.manifoldcf.agents.interfaces.ServiceInterruption;
 import org.apache.manifoldcf.agents.output.BaseOutputConnector;
+import org.apache.manifoldcf.agents.system.Logging;
 import org.apache.manifoldcf.core.interfaces.ConfigParams;
 import org.apache.manifoldcf.core.interfaces.ConfigurationNode;
 import org.apache.manifoldcf.core.interfaces.IHTTPOutput;
@@ -169,9 +171,6 @@ public class FileOutputConnector extends
 
     FileOutputSpecs specs = null;
     StringBuffer path = new StringBuffer();
-    InputStream input = null;
-    FileOutputStream output = null;
-    FileLock lock = null;
     try {
       specs = new FileOutputSpecs(outputDescription);
 
@@ -186,6 +185,8 @@ public class FileOutputConnector extends
 
       File file = new File(path.toString());
 
+      //System.out.println("File is '"+file+"'");
+
       /*
        * make directory
        */
@@ -200,80 +201,100 @@ public class FileOutputConnector extends
         file.delete();
       }
 
-      input = document.getBinaryStream();
-      output = new FileOutputStream(file);
-
+      FileOutputStream output = new FileOutputStream(file);
       try {
         /*
          * lock file
          */
-        boolean locked = false;
-        int retryCount = 0;
         FileChannel channel = output.getChannel();
-        while(retryCount < 10) {
-          lock = channel.tryLock();
-          if (lock == null) {
-            retryCount++;
-            try {
-              Thread.sleep(1000L);
-            } catch(InterruptedException e) {
+        FileLock lock = channel.tryLock();
+        if (lock == null)
+          throw new ServiceInterruption("Could not lock file: '"+file+"'",null,1000L,-1L,10,false);
+
+        try {
+
+          /*
+           * write file
+           */
+          InputStream input = document.getBinaryStream();
+          byte buf[] = new byte[1024];
+          int len;
+          while((len = input.read(buf)) != -1) {
+            output.write(buf, 0, len);
+          }
+          output.flush();
+        } finally {
+          // Unlock
+          try {
+            if (lock != null) {
+              lock.release();
             }
-          } else {
-            locked = true;
-            break;
+          } catch (ClosedChannelException e) {
           }
         }
-        if (!locked) {
-          throw new ManifoldCFException("Lock failed.");
-        }
-
-        /*
-         * write file
-         */
-        byte buf[] = new byte[1024];
-        int len;
-        while((len = input.read(buf)) != -1) {
-          output.write(buf, 0, len);
-        }
-        output.flush();
       } finally {
-        /*
-         * release file
-         */
         try {
-          if (lock != null) {
-            lock.release();
-          }
-        } catch (ClosedChannelException e) {
+          output.close();
+        } catch (IOException e) {
         }
       }
     } catch (JSONException e) {
+      handleJSONException(e);
       return DOCUMENTSTATUS_REJECTED;
     } catch (URISyntaxException e) {
+      handleURISyntaxException(e);
       return DOCUMENTSTATUS_REJECTED;
     } catch (SecurityException e) {
+      handleSecurityException(e);
       return DOCUMENTSTATUS_REJECTED;
     } catch (FileNotFoundException e) {
+      handleFileNotFoundException(e);
       return DOCUMENTSTATUS_REJECTED;
     } catch (IOException e) {
+      handleIOException(e);
       return DOCUMENTSTATUS_REJECTED;
-    } catch (NullPointerException e) {
-      return DOCUMENTSTATUS_REJECTED;
-    } finally {
-      try {
-        input.close();
-      } catch (IOException e) {
-      }
-      try {
-        output.close();
-      } catch (IOException e) {
-      }
     }
 
     activities.recordActivity(null, INGEST_ACTIVITY, new Long(document.getBinaryLength()), documentURI, "OK", null);
     return DOCUMENTSTATUS_ACCEPTED;
   }
 
+  protected static void handleJSONException(JSONException e)
+    throws ManifoldCFException, ServiceInterruption {
+    Logging.agents.error("FileSystem: JSONException: "+e.getMessage(),e);
+    throw new ManifoldCFException(e.getMessage(),e);
+  }
+
+  protected static void handleURISyntaxException(URISyntaxException e)
+    throws ManifoldCFException, ServiceInterruption {
+    Logging.agents.error("FileSystem: URISyntaxException: "+e.getMessage(),e);
+    throw new ManifoldCFException(e.getMessage(),e);
+  }
+
+  protected static void handleSecurityException(SecurityException e)
+    throws ManifoldCFException, ServiceInterruption {
+    Logging.agents.error("FileSystem: SecurityException: "+e.getMessage(),e);
+    throw new ManifoldCFException(e.getMessage(),e);
+  }
+
+  protected static void handleFileNotFoundException(FileNotFoundException e)
+    throws ManifoldCFException, ServiceInterruption {
+    Logging.agents.error("FileSystem: Path is illegal: "+e.getMessage(),e);
+    throw new ManifoldCFException(e.getMessage(),e);
+  }
+
+  /** Handle IOException */
+  protected static void handleIOException(IOException e)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    if (!(e instanceof java.net.SocketTimeoutException) && (e instanceof InterruptedIOException)) {
+      throw new ManifoldCFException("Interrupted: " + e.getMessage(), e, ManifoldCFException.INTERRUPTED);
+    }
+    long currentTime = System.currentTimeMillis();
+    Logging.agents.warn("FileSystem: IO exception: "+e.getMessage(),e);
+    throw new ServiceInterruption("IO exception: "+e.getMessage(), e, currentTime + 300000L, currentTime + 3 * 60 * 60000L,-1,false);
+  }
+
   /** Remove a document using the connector.
    * Note that the last outputDescription is included, since it may be necessary for the connector to use such information to know how to properly remove the document.
    *@param documentURI is the URI of the document.  The URI is presumed to be the unique identifier which the output data store will use to process
@@ -515,21 +536,21 @@ public class FileOutputConnector extends
           for (String name : uri.getRawPath().split("/")) {
             if (name.length() > 0) {
               path.append("/");
-              path.append(name);
+              path.append(convertString(name));
             }
           }
         }
       }
       if (uri.getRawQuery() != null) {
         path.append("?");
-        path.append(uri.getRawQuery());
+        path.append(convertString(uri.getRawQuery()));
       }
     } else {
       if (uri.getRawSchemeSpecificPart() != null) {
         for (String name : uri.getRawSchemeSpecificPart().split("/")) {
           if (name.length() > 0) {
             path.append("/");
-            path.append(name);
+            path.append(convertString(name));
           }
         }
       }
@@ -540,4 +561,18 @@ public class FileOutputConnector extends
     }
     return path.toString();
   }
+  
+  final private String convertString(final String input) {
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < input.length(); i++) {
+      char c = input.charAt(i);
+      // Handle filename disallowed special characters!
+      if (c == ':') {
+        // MHL for what really happens to colons
+      }
+      else
+        sb.append(c);
+    }
+    return sb.toString();
+  }
 }

Modified: manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java?rev=1499996&r1=1499995&r2=1499996&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java (original)
+++ manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java Fri Jul  5 13:21:49 2013
@@ -410,13 +410,13 @@ public class FileConnector extends org.a
   */
   protected static String findConvertPath(DocumentSpecification spec, File theFile)
   {
-    String fullpath = theFile.getAbsolutePath().replaceAll("\\","/");
+    String fullpath = theFile.getAbsolutePath().replaceAll("\\\\","/");
     for (int j = 0; j < spec.getChildCount(); j++)
     {
       SpecificationNode sn = spec.getChild(j);
       if (sn.getType().equals("startpoint"))
       {
-        String path = sn.getAttributeValue("path").replaceAll("\\","/");
+        String path = sn.getAttributeValue("path").replaceAll("\\\\","/");
         String convertToURI = sn.getAttributeValue("converttouri");
         if (path.length() > 0 && convertToURI != null && convertToURI.equals("true"))
         {