You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/10/20 20:05:19 UTC
svn commit: r1633193 - in /manifoldcf/trunk:
connectors/alfresco/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfresco/
framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/
Author: kwright
Date: Mon Oct 20 18:05:19 2014
New Revision: 1633193
URL: http://svn.apache.org/r1633193
Log:
Fix CONNECTORS-1077 for Alfresco connector.
Modified:
manifoldcf/trunk/connectors/alfresco/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfresco/AlfrescoRepositoryConnector.java
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IHistoryActivity.java
Modified: manifoldcf/trunk/connectors/alfresco/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfresco/AlfrescoRepositoryConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/alfresco/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfresco/AlfrescoRepositoryConnector.java?rev=1633193&r1=1633192&r2=1633193&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/alfresco/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfresco/AlfrescoRepositoryConnector.java (original)
+++ manifoldcf/trunk/connectors/alfresco/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfresco/AlfrescoRepositoryConnector.java Mon Oct 20 18:05:19 2014
@@ -890,106 +890,122 @@ public class AlfrescoRepositoryConnector
String errorCode = "OK";
String errorDesc = StringUtils.EMPTY;
+ Long fileLengthLong = null;
+
long startTime = System.currentTimeMillis();
try{
- boolean isFolder = ContentModelUtils.isFolder(endpoint, username, password, socketTimeout, session, reference);
-
- //a generic node in Alfresco could have child-associations
- if (isFolder) {
- // queue all the children of the folder
- QueryResult queryResult = SearchUtils.getChildren(endpoint, username, password, socketTimeout, session, reference);
- ResultSet resultSet = queryResult.getResultSet();
- ResultSetRow[] resultSetRows = resultSet.getRows();
- for (ResultSetRow resultSetRow : resultSetRows) {
- NamedValue[] childProperties = resultSetRow.getColumns();
- String childNodeReference = PropertiesUtils.getNodeReference(childProperties);
- activities.addDocumentReference(childNodeReference, nodeReference, RELATIONSHIP_CHILD);
- }
- }
-
- }catch(IOException e){
- Logging.connectors.warn(
- "Alfresco: IOException finding children: "
- + e.getMessage(), e);
- handleIOException(e);
- }
-
- //a generic node in Alfresco could also have binaries content
- if (isDocument) {
- // this is a content to ingest
- InputStream is = null;
- long fileLength = 0;
- try {
- //properties ingestion
- RepositoryDocument rd = new RepositoryDocument();
- List<NamedValue> contentProperties = PropertiesUtils.getContentProperties(properties);
- PropertiesUtils.ingestProperties(rd, properties, contentProperties);
-
- // binaries ingestion - in Alfresco we could have more than one binary for each node (custom content models)
- for (NamedValue contentProperty : contentProperties) {
- //we are ingesting all the binaries defined as d:content property in the Alfresco content model
- Content binary = ContentReader.read(endpoint, username, password, socketTimeout, session, predicate, contentProperty.getName());
- fileLength = binary.getLength();
- is = ContentReader.getBinary(endpoint, binary, username, password, socketTimeout, session);
- rd.setBinary(is, fileLength);
-
- //id is the node reference only if the node has an unique content stream
- //For a node with a single d:content property: id = node reference
- String id = PropertiesUtils.getNodeReference(properties);
-
- //For a node with multiple d:content properties: id = node reference;QName
- //The QName of a property of type d:content will be appended to the node reference
- if(contentProperties.size()>1){
- id = id + INGESTION_SEPARATOR_FOR_MULTI_BINARY + contentProperty.getName();
+ try{
+ boolean isFolder = ContentModelUtils.isFolder(endpoint, username, password, socketTimeout, session, reference);
+
+ //a generic node in Alfresco could have child-associations
+ if (isFolder) {
+ // queue all the children of the folder
+ QueryResult queryResult = SearchUtils.getChildren(endpoint, username, password, socketTimeout, session, reference);
+ ResultSet resultSet = queryResult.getResultSet();
+ ResultSetRow[] resultSetRows = resultSet.getRows();
+ for (ResultSetRow resultSetRow : resultSetRows) {
+ NamedValue[] childProperties = resultSetRow.getColumns();
+ String childNodeReference = PropertiesUtils.getNodeReference(childProperties);
+ activities.addDocumentReference(childNodeReference, nodeReference, RELATIONSHIP_CHILD);
}
-
- //the document uri is related to the specific d:content property available in the node
- //we want to ingest each content stream that are nested in a single node
- String documentURI = binary.getUrl();
- activities.ingestDocumentWithException(documentIdentifier, id, versionString, documentURI, rd);
+ }
+
+ }catch(IOException e){
+ if (e instanceof java.io.InterruptedIOException && !(e instanceof java.net.SocketTimeoutException))
+ errorCode = null;
+ else {
+ errorCode = activities.IOEXCEPTION;
+ errorDesc = e.getMessage();
}
-
- AuthenticationUtils.endSession();
-
- } catch (ParseException e) {
- errorCode = "IO ERROR";
- errorDesc = e.getMessage();
- Logging.connectors.warn(
- "Alfresco: Error during the reading process of dates: "
- + e.getMessage(), e);
- handleParseException(e);
- } catch (IOException e) {
Logging.connectors.warn(
- "Alfresco: IOException: "
+ "Alfresco: IOException finding children: "
+ e.getMessage(), e);
handleIOException(e);
- } finally {
+ }
+
+ //a generic node in Alfresco could also have binaries content
+ if (isDocument) {
+ // this is a content to ingest
+ InputStream is = null;
+ long fileLength = 0;
try {
- if(is!=null){
- is.close();
+ //properties ingestion
+ RepositoryDocument rd = new RepositoryDocument();
+ List<NamedValue> contentProperties = PropertiesUtils.getContentProperties(properties);
+ PropertiesUtils.ingestProperties(rd, properties, contentProperties);
+
+ // binaries ingestion - in Alfresco we could have more than one binary for each node (custom content models)
+ for (NamedValue contentProperty : contentProperties) {
+ //we are ingesting all the binaries defined as d:content property in the Alfresco content model
+ Content binary = ContentReader.read(endpoint, username, password, socketTimeout, session, predicate, contentProperty.getName());
+ fileLength = binary.getLength();
+ is = ContentReader.getBinary(endpoint, binary, username, password, socketTimeout, session);
+ rd.setBinary(is, fileLength);
+
+ //id is the node reference only if the node has an unique content stream
+ //For a node with a single d:content property: id = node reference
+ String id = PropertiesUtils.getNodeReference(properties);
+
+ //For a node with multiple d:content properties: id = node reference;QName
+ //The QName of a property of type d:content will be appended to the node reference
+ if(contentProperties.size()>1){
+ id = id + INGESTION_SEPARATOR_FOR_MULTI_BINARY + contentProperty.getName();
+ }
+
+ //the document uri is related to the specific d:content property available in the node
+ //we want to ingest each content stream that are nested in a single node
+ String documentURI = binary.getUrl();
+ activities.ingestDocumentWithException(documentIdentifier, id, versionString, documentURI, rd);
+ fileLengthLong = new Long(fileLength);
}
- } catch (InterruptedIOException e) {
- errorCode = "Interrupted error";
+
+ AuthenticationUtils.endSession();
+
+ } catch (ParseException e) {
+ errorCode = "PARSEEXCEPTION";
errorDesc = e.getMessage();
- throw new ManifoldCFException(e.getMessage(), e,
- ManifoldCFException.INTERRUPTED);
+ Logging.connectors.warn(
+ "Alfresco: Error during the reading process of dates: "
+ + e.getMessage(), e);
+ handleParseException(e);
} catch (IOException e) {
- errorCode = "IO ERROR";
- errorDesc = e.getMessage();
+ if (e instanceof java.io.InterruptedIOException && !(e instanceof java.net.SocketTimeoutException))
+ errorCode = null;
+ else {
+ errorCode = activities.IOEXCEPTION;
+ errorDesc = e.getMessage();
+ }
Logging.connectors.warn(
- "Alfresco: IOException closing file input stream: "
+ "Alfresco: IOException: "
+ e.getMessage(), e);
handleIOException(e);
+ } finally {
+ session = null;
+ try {
+ if(is!=null){
+ is.close();
+ }
+ } catch (InterruptedIOException e) {
+ errorCode = null;
+ throw new ManifoldCFException(e.getMessage(), e,
+ ManifoldCFException.INTERRUPTED);
+ } catch (IOException e) {
+ errorCode = activities.IOEXCEPTION;
+ errorDesc = e.getMessage();
+ Logging.connectors.warn(
+ "Alfresco: IOException closing file input stream: "
+ + e.getMessage(), e);
+ handleIOException(e);
+ }
}
-
- session = null;
-
- activities.recordActivity(new Long(startTime), ACTIVITY_READ,
- fileLength, nodeReference, errorCode, errorDesc, null);
- }
+ }
+ } finally {
+ if (errorCode != null)
+ activities.recordActivity(new Long(startTime), ACTIVITY_READ,
+ fileLengthLong, nodeReference, errorCode, errorDesc, null);
}
}
}
Modified: manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IHistoryActivity.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IHistoryActivity.java?rev=1633193&r1=1633192&r2=1633193&view=diff
==============================================================================
--- manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IHistoryActivity.java (original)
+++ manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IHistoryActivity.java Mon Oct 20 18:05:19 2014
@@ -33,6 +33,7 @@ public interface IHistoryActivity
public static final String EXCLUDED_MIMETYPE = IOutputHistoryActivity.EXCLUDED_MIMETYPE;
public static final String EXCLUDED_DATE = IOutputHistoryActivity.EXCLUDED_DATE;
public static final String EXCEPTION = IOutputHistoryActivity.EXCEPTION;
+ public static final String IOEXCEPTION = IOutputHistoryActivity.IOEXCEPTION;
/** Record time-stamped information about the activity of the connector.
*@param startTime is either null or the time since the start of epoch in milliseconds (Jan 1, 1970). Every