You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2013/07/05 15:21:49 UTC
svn commit: r1499996 - in /manifoldcf/trunk: ./
connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/
connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/
Author: kwright
Date: Fri Jul 5 13:21:49 2013
New Revision: 1499996
URL: http://svn.apache.org/r1499996
Log:
Fix for CONNECTORS-748.
Modified:
manifoldcf/trunk/CHANGES.txt
manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputConnector.java
manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java
Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1499996&r1=1499995&r2=1499996&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Fri Jul 5 13:21:49 2013
@@ -3,6 +3,11 @@ $Id$
======================= 1.3-dev =====================
+CONNECTORS-748: Fix broken regular expressions in file connector, and
+modify file output connector to deal with colons in the file name in an
+acceptable way (so can be used on WIndows too).
+(Minoru Osuka, Karl Wright)
+
CONNECTORS-729: Break up Jira URL into components, with proper
javascript checking.
(Karl Wright)
Modified: manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputConnector.java?rev=1499996&r1=1499995&r2=1499996&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputConnector.java (original)
+++ manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputConnector.java Fri Jul 5 13:21:49 2013
@@ -23,6 +23,7 @@ import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
+import java.io.InterruptedIOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.URI;
@@ -41,6 +42,7 @@ import org.apache.manifoldcf.agents.inte
import org.apache.manifoldcf.agents.interfaces.RepositoryDocument;
import org.apache.manifoldcf.agents.interfaces.ServiceInterruption;
import org.apache.manifoldcf.agents.output.BaseOutputConnector;
+import org.apache.manifoldcf.agents.system.Logging;
import org.apache.manifoldcf.core.interfaces.ConfigParams;
import org.apache.manifoldcf.core.interfaces.ConfigurationNode;
import org.apache.manifoldcf.core.interfaces.IHTTPOutput;
@@ -169,9 +171,6 @@ public class FileOutputConnector extends
FileOutputSpecs specs = null;
StringBuffer path = new StringBuffer();
- InputStream input = null;
- FileOutputStream output = null;
- FileLock lock = null;
try {
specs = new FileOutputSpecs(outputDescription);
@@ -186,6 +185,8 @@ public class FileOutputConnector extends
File file = new File(path.toString());
+ //System.out.println("File is '"+file+"'");
+
/*
* make directory
*/
@@ -200,80 +201,100 @@ public class FileOutputConnector extends
file.delete();
}
- input = document.getBinaryStream();
- output = new FileOutputStream(file);
-
+ FileOutputStream output = new FileOutputStream(file);
try {
/*
* lock file
*/
- boolean locked = false;
- int retryCount = 0;
FileChannel channel = output.getChannel();
- while(retryCount < 10) {
- lock = channel.tryLock();
- if (lock == null) {
- retryCount++;
- try {
- Thread.sleep(1000L);
- } catch(InterruptedException e) {
+ FileLock lock = channel.tryLock();
+ if (lock == null)
+ throw new ServiceInterruption("Could not lock file: '"+file+"'",null,1000L,-1L,10,false);
+
+ try {
+
+ /*
+ * write file
+ */
+ InputStream input = document.getBinaryStream();
+ byte buf[] = new byte[1024];
+ int len;
+ while((len = input.read(buf)) != -1) {
+ output.write(buf, 0, len);
+ }
+ output.flush();
+ } finally {
+ // Unlock
+ try {
+ if (lock != null) {
+ lock.release();
}
- } else {
- locked = true;
- break;
+ } catch (ClosedChannelException e) {
}
}
- if (!locked) {
- throw new ManifoldCFException("Lock failed.");
- }
-
- /*
- * write file
- */
- byte buf[] = new byte[1024];
- int len;
- while((len = input.read(buf)) != -1) {
- output.write(buf, 0, len);
- }
- output.flush();
} finally {
- /*
- * release file
- */
try {
- if (lock != null) {
- lock.release();
- }
- } catch (ClosedChannelException e) {
+ output.close();
+ } catch (IOException e) {
}
}
} catch (JSONException e) {
+ handleJSONException(e);
return DOCUMENTSTATUS_REJECTED;
} catch (URISyntaxException e) {
+ handleURISyntaxException(e);
return DOCUMENTSTATUS_REJECTED;
} catch (SecurityException e) {
+ handleSecurityException(e);
return DOCUMENTSTATUS_REJECTED;
} catch (FileNotFoundException e) {
+ handleFileNotFoundException(e);
return DOCUMENTSTATUS_REJECTED;
} catch (IOException e) {
+ handleIOException(e);
return DOCUMENTSTATUS_REJECTED;
- } catch (NullPointerException e) {
- return DOCUMENTSTATUS_REJECTED;
- } finally {
- try {
- input.close();
- } catch (IOException e) {
- }
- try {
- output.close();
- } catch (IOException e) {
- }
}
activities.recordActivity(null, INGEST_ACTIVITY, new Long(document.getBinaryLength()), documentURI, "OK", null);
return DOCUMENTSTATUS_ACCEPTED;
}
+ protected static void handleJSONException(JSONException e)
+ throws ManifoldCFException, ServiceInterruption {
+ Logging.agents.error("FileSystem: JSONException: "+e.getMessage(),e);
+ throw new ManifoldCFException(e.getMessage(),e);
+ }
+
+ protected static void handleURISyntaxException(URISyntaxException e)
+ throws ManifoldCFException, ServiceInterruption {
+ Logging.agents.error("FileSystem: URISyntaxException: "+e.getMessage(),e);
+ throw new ManifoldCFException(e.getMessage(),e);
+ }
+
+ protected static void handleSecurityException(SecurityException e)
+ throws ManifoldCFException, ServiceInterruption {
+ Logging.agents.error("FileSystem: SecurityException: "+e.getMessage(),e);
+ throw new ManifoldCFException(e.getMessage(),e);
+ }
+
+ protected static void handleFileNotFoundException(FileNotFoundException e)
+ throws ManifoldCFException, ServiceInterruption {
+ Logging.agents.error("FileSystem: Path is illegal: "+e.getMessage(),e);
+ throw new ManifoldCFException(e.getMessage(),e);
+ }
+
+ /** Handle IOException */
+ protected static void handleIOException(IOException e)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ if (!(e instanceof java.net.SocketTimeoutException) && (e instanceof InterruptedIOException)) {
+ throw new ManifoldCFException("Interrupted: " + e.getMessage(), e, ManifoldCFException.INTERRUPTED);
+ }
+ long currentTime = System.currentTimeMillis();
+ Logging.agents.warn("FileSystem: IO exception: "+e.getMessage(),e);
+ throw new ServiceInterruption("IO exception: "+e.getMessage(), e, currentTime + 300000L, currentTime + 3 * 60 * 60000L,-1,false);
+ }
+
/** Remove a document using the connector.
* Note that the last outputDescription is included, since it may be necessary for the connector to use such information to know how to properly remove the document.
*@param documentURI is the URI of the document. The URI is presumed to be the unique identifier which the output data store will use to process
@@ -515,21 +536,21 @@ public class FileOutputConnector extends
for (String name : uri.getRawPath().split("/")) {
if (name.length() > 0) {
path.append("/");
- path.append(name);
+ path.append(convertString(name));
}
}
}
}
if (uri.getRawQuery() != null) {
path.append("?");
- path.append(uri.getRawQuery());
+ path.append(convertString(uri.getRawQuery()));
}
} else {
if (uri.getRawSchemeSpecificPart() != null) {
for (String name : uri.getRawSchemeSpecificPart().split("/")) {
if (name.length() > 0) {
path.append("/");
- path.append(name);
+ path.append(convertString(name));
}
}
}
@@ -540,4 +561,18 @@ public class FileOutputConnector extends
}
return path.toString();
}
+
+ final private String convertString(final String input) {
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < input.length(); i++) {
+ char c = input.charAt(i);
+ // Handle filename disallowed special characters!
+ if (c == ':') {
+ // MHL for what really happens to colons
+ }
+ else
+ sb.append(c);
+ }
+ return sb.toString();
+ }
}
Modified: manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java?rev=1499996&r1=1499995&r2=1499996&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java (original)
+++ manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java Fri Jul 5 13:21:49 2013
@@ -410,13 +410,13 @@ public class FileConnector extends org.a
*/
protected static String findConvertPath(DocumentSpecification spec, File theFile)
{
- String fullpath = theFile.getAbsolutePath().replaceAll("\\","/");
+ String fullpath = theFile.getAbsolutePath().replaceAll("\\\\","/");
for (int j = 0; j < spec.getChildCount(); j++)
{
SpecificationNode sn = spec.getChild(j);
if (sn.getType().equals("startpoint"))
{
- String path = sn.getAttributeValue("path").replaceAll("\\","/");
+ String path = sn.getAttributeValue("path").replaceAll("\\\\","/");
String convertToURI = sn.getAttributeValue("converttouri");
if (path.length() > 0 && convertToURI != null && convertToURI.equals("true"))
{