You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/10/21 13:35:14 UTC
svn commit: r1633345 - in /manifoldcf/trunk/connectors:
alfresco-webscript/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfrescowebscript/
alfresco/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfresco/
cmis/conn...
Author: kwright
Date: Tue Oct 21 11:35:13 2014
New Revision: 1633345
URL: http://svn.apache.org/r1633345
Log:
CONNECTORS-1077 work for alfresco-webscript connector
Modified:
manifoldcf/trunk/connectors/alfresco-webscript/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfrescowebscript/AlfrescoConnector.java
manifoldcf/trunk/connectors/alfresco/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfresco/AlfrescoRepositoryConnector.java
manifoldcf/trunk/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java
Modified: manifoldcf/trunk/connectors/alfresco-webscript/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfrescowebscript/AlfrescoConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/alfresco-webscript/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfrescowebscript/AlfrescoConnector.java?rev=1633345&r1=1633344&r2=1633345&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/alfresco-webscript/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfrescowebscript/AlfrescoConnector.java (original)
+++ manifoldcf/trunk/connectors/alfresco-webscript/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfrescowebscript/AlfrescoConnector.java Tue Oct 21 11:35:13 2014
@@ -41,8 +41,10 @@ import java.util.*;
public class AlfrescoConnector extends BaseRepositoryConnector {
+
private static final String ACTIVITY_FETCH = "fetch document";
private static final String[] activitiesList = new String[]{ACTIVITY_FETCH};
+
private AlfrescoClient alfrescoClient;
private static final String CONTENT_URL_PROPERTY = "contentUrlPath";
@@ -197,6 +199,12 @@ public class AlfrescoConnector extends B
boolean enableDocumentProcessing = ConfigurationHandler.getEnableDocumentProcessing(spec);
for (String doc : documentIdentifiers) {
+
+ String errorCode = null;
+ String errorDesc = null;
+ Long fileLengthLong = null;
+ long startTime = System.currentTimeMillis();
+
try {
String nextVersion = statuses.getIndexedVersionString(doc);
@@ -269,19 +277,40 @@ public class AlfrescoConnector extends B
if (lSize != null && !activities.checkLengthIndexable(lSize.longValue())) {
activities.noDocument(doc, documentVersion);
+ errorCode = activities.EXCLUDED_LENGTH;
+ errorDesc = "Excluding document because of length ("+lSize+")";
continue;
}
if (!activities.checkMimeTypeIndexable(mimeType)) {
activities.noDocument(doc, documentVersion);
+ errorCode = activities.EXCLUDED_MIMETYPE;
+ errorDesc = "Excluding document because of mime type ("+mimeType+")";
continue;
}
if (!activities.checkDateIndexable(modifiedDate)) {
activities.noDocument(doc, documentVersion);
+ errorCode = activities.EXCLUDED_DATE;
+ errorDesc = "Excluding document because of date ("+modifiedDate+")";
+ continue;
+ }
+
+ String contentUrlPath = (String) properties.get(CONTENT_URL_PROPERTY);
+ if (contentUrlPath == null || contentUrlPath.isEmpty()) {
+ activities.noDocument(doc, documentVersion);
+ errorCode = "NOURL";
+ errorDesc = "Excluding document because no URL found";
continue;
}
+ if (!activities.checkURLIndexable(contentUrlPath)) {
+ activities.noDocument(doc, documentVersion);
+ errorCode = activities.EXCLUDED_URL;
+ errorDesc = "Excluding document because of URL ('"+contentUrlPath+"')";
+ continue;
+ }
+
RepositoryDocument rd = new RepositoryDocument();
rd.addField(FIELD_NODEREF, nodeRef);
rd.addField(FIELD_TYPE, type);
@@ -313,17 +342,14 @@ public class AlfrescoConnector extends B
InputStream stream;
long length;
byte[] empty = new byte[0];
- String contentUrlPath = (String) properties.get(CONTENT_URL_PROPERTY);
- if (contentUrlPath == null || contentUrlPath.isEmpty()) {
- activities.noDocument(doc, documentVersion);
- continue;
- }
if (enableDocumentProcessing) {
if (lSize != null) {
stream = alfrescoClient.fetchContent(contentUrlPath);
if (stream == null) {
activities.noDocument(doc, documentVersion);
+ errorCode = "NOSTREAM";
+ errorDesc = "Excluding document because no content stream found";
continue;
}
length = lSize.longValue();
@@ -341,6 +367,8 @@ public class AlfrescoConnector extends B
if (Logging.connectors != null && Logging.connectors.isDebugEnabled())
Logging.connectors.debug(MessageFormat.format("Ingesting with id: {0}, URI {1} and rd {2}", new Object[]{uuid, nodeRef, rd.getFileName()}));
activities.ingestDocumentWithException(doc, documentVersion, contentUrlPath, rd);
+ errorCode = "OK";
+ fileLengthLong = new Long(length);
} catch (IOException e) {
handleIOException(e,"reading stream");
} finally {
@@ -353,6 +381,14 @@ public class AlfrescoConnector extends B
} catch (AlfrescoDownException e) {
handleAlfrescoDownException(e,"processing");
+ } catch (ManifoldCFException e) {
+ if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
+ errorCode = null;
+ throw e;
+ } finally {
+ if (errorCode != null)
+ activities.recordActivity(new Long(startTime), ACTIVITY_FETCH,
+ fileLengthLong, doc, errorCode, errorDesc, null);
}
}
}
Modified: manifoldcf/trunk/connectors/alfresco/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfresco/AlfrescoRepositoryConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/alfresco/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfresco/AlfrescoRepositoryConnector.java?rev=1633345&r1=1633344&r2=1633345&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/alfresco/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfresco/AlfrescoRepositoryConnector.java (original)
+++ manifoldcf/trunk/connectors/alfresco/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfresco/AlfrescoRepositoryConnector.java Tue Oct 21 11:35:13 2014
@@ -913,12 +913,8 @@ public class AlfrescoRepositoryConnector
}
}catch(IOException e){
- if (e instanceof java.io.InterruptedIOException && !(e instanceof java.net.SocketTimeoutException))
- errorCode = null;
- else {
- errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
- errorDesc = e.getMessage();
- }
+ errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
+ errorDesc = e.getMessage();
Logging.connectors.warn(
"Alfresco: IOException finding children: "
+ e.getMessage(), e);
@@ -971,12 +967,8 @@ public class AlfrescoRepositoryConnector
+ e.getMessage(), e);
handleParseException(e);
} catch (IOException e) {
- if (e instanceof java.io.InterruptedIOException && !(e instanceof java.net.SocketTimeoutException))
- errorCode = null;
- else {
- errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
- errorDesc = e.getMessage();
- }
+ errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
+ errorDesc = e.getMessage();
Logging.connectors.warn(
"Alfresco: IOException: "
+ e.getMessage(), e);
@@ -1002,6 +994,10 @@ public class AlfrescoRepositoryConnector
}
}
+ } catch (ManifoldCFException e) {
+ if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
+ errorCode = null;
+ throw e;
} finally {
if (errorCode != null)
activities.recordActivity(new Long(startTime), ACTIVITY_READ,
Modified: manifoldcf/trunk/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java?rev=1633345&r1=1633344&r2=1633345&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java (original)
+++ manifoldcf/trunk/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java Tue Oct 21 11:35:13 2014
@@ -1326,10 +1326,7 @@ public class CmisRepositoryConnector ext
fileLengthLong = new Long(fileLength);
errorCode = "OK";
} catch (IOException e) {
- if (e instanceof InterruptedIOException && !(e instanceof java.net.SocketTimeoutException))
- errorCode = null;
- else
- errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
+ errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
errorDesc = e.getMessage();
handleIOException(e, "reading file input stream");
}
@@ -1339,10 +1336,7 @@ public class CmisRepositoryConnector ext
is.close();
}
} catch (IOException e) {
- if (e instanceof InterruptedIOException && !(e instanceof java.net.SocketTimeoutException))
- errorCode = null;
- else
- errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
+ errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
errorDesc = e.getMessage();
handleIOException(e, "closing file input stream");
}
@@ -1353,6 +1347,10 @@ public class CmisRepositoryConnector ext
errorCode = "UNKNOWNTYPE";
errorDesc = "Document type is unrecognized: '"+baseTypeId+"'";
}
+ } catch (ManifoldCFException e) {
+ if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
+ errorCode = null;
+ throw e;
} finally {
if (errorCode != null)
activities.recordActivity(new Long(startTime), ACTIVITY_READ,