You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/10/21 12:16:35 UTC
svn commit: r1633337 - in /manifoldcf/branches/dev_1x: ./
connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java
Author: kwright
Date: Tue Oct 21 10:16:35 2014
New Revision: 1633337
URL: http://svn.apache.org/r1633337
Log:
Pull up more CONNECTORS-1077 fixes from trunk
Modified:
manifoldcf/branches/dev_1x/ (props changed)
manifoldcf/branches/dev_1x/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java
Propchange: manifoldcf/branches/dev_1x/
------------------------------------------------------------------------------
Merged /manifoldcf/trunk:r1633336
Modified: manifoldcf/branches/dev_1x/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java?rev=1633337&r1=1633336&r2=1633337&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java (original)
+++ manifoldcf/branches/dev_1x/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java Tue Oct 21 10:16:35 2014
@@ -1099,7 +1099,6 @@ public class CmisRepositoryConnector ext
}
}
- getSession();
for (String documentIdentifier : documentIdentifiers) {
@@ -1107,6 +1106,8 @@ public class CmisRepositoryConnector ext
Logging.connectors.debug("CMIS: Processing document identifier '"
+ documentIdentifier + "'");
+ getSession();
+
// Load the object. If this fails, it has been deleted.
CmisObject cmisObject;
try {
@@ -1151,214 +1152,236 @@ public class CmisRepositoryConnector ext
if (versionString.length() == 0 || activities.checkDocumentNeedsReindexing(documentIdentifier,versionString)) {
// Index this document
- String errorCode = "OK";
- String errorDesc = StringUtils.EMPTY;
+ String errorCode = null;
+ String errorDesc = null;
+ Long fileLengthLong = null;
long startTime = System.currentTimeMillis();
-
- String baseTypeId = cmisObject.getBaseType().getId();
-
- if (baseTypeId.equals(CMIS_FOLDER_BASE_TYPE)) {
+ try {
+ String baseTypeId = cmisObject.getBaseType().getId();
- // adding all the children for a folder
+ if (baseTypeId.equals(CMIS_FOLDER_BASE_TYPE)) {
- Folder folder = (Folder) cmisObject;
- ItemIterable<CmisObject> children = folder.getChildren();
- for (CmisObject child : children) {
- activities.addDocumentReference(child.getId(), documentIdentifier,
- RELATIONSHIP_CHILD);
- }
- } else if(baseTypeId.equals(CMIS_DOCUMENT_BASE_TYPE)) {
- // content ingestion
+ // adding all the children for a folder
- Document document = (Document) cmisObject;
-
- Date createdDate = document.getCreationDate().getTime();
- Date modifiedDate = document.getLastModificationDate().getTime();
- long fileLength = document.getContentStreamLength();
- String fileName = document.getContentStreamFileName();
- String mimeType = document.getContentStreamMimeType();
- //documentURI
- String documentURI = CmisRepositoryConnectorUtils.getDocumentURL(document, session);
-
- // Do any filtering (which will save us work)
- if (!activities.checkURLIndexable(documentURI))
- {
- activities.noDocument(documentIdentifier,versionString);
- continue;
- }
-
- if (!activities.checkMimeTypeIndexable(mimeType))
- {
- activities.noDocument(documentIdentifier,versionString);
- continue;
- }
+ Folder folder = (Folder) cmisObject;
+ ItemIterable<CmisObject> children = folder.getChildren();
+ for (CmisObject child : children) {
+ activities.addDocumentReference(child.getId(), documentIdentifier,
+ RELATIONSHIP_CHILD);
+ }
+ } else if(baseTypeId.equals(CMIS_DOCUMENT_BASE_TYPE)) {
+ // content ingestion
- if (!activities.checkLengthIndexable(fileLength))
- {
- activities.noDocument(documentIdentifier,versionString);
- continue;
- }
-
- if (!activities.checkDateIndexable(modifiedDate))
- {
- activities.noDocument(documentIdentifier,versionString);
- continue;
- }
-
- RepositoryDocument rd = new RepositoryDocument();
- rd.setFileName(fileName);
- rd.setMimeType(mimeType);
- rd.setCreatedDate(createdDate);
- rd.setModifiedDate(modifiedDate);
-
- InputStream is;
- try {
- if (fileLength > 0)
- is = document.getContentStream().getStream();
- else
- is = null;
- } catch (CmisObjectNotFoundException e) {
- // Document gone
- activities.deleteDocument(documentIdentifier);
- continue;
- }
+ Document document = (Document) cmisObject;
- try {
- //binary
- if(is != null) {
- rd.setBinary(is, fileLength);
- } else {
- rd.setBinary(new NullInputStream(0),0);
+ Date createdDate = document.getCreationDate().getTime();
+ Date modifiedDate = document.getLastModificationDate().getTime();
+ long fileLength = document.getContentStreamLength();
+ String fileName = document.getContentStreamFileName();
+ String mimeType = document.getContentStreamMimeType();
+ //documentURI
+ String documentURI = CmisRepositoryConnectorUtils.getDocumentURL(document, session);
+
+ // Do any filtering (which will save us work)
+ if (!activities.checkURLIndexable(documentURI))
+ {
+ activities.noDocument(documentIdentifier,versionString);
+ errorCode = activities.EXCLUDED_URL;
+ errorDesc = "Excluding due to URL ('"+documentURI+"')";
+ continue;
+ }
+
+ if (!activities.checkMimeTypeIndexable(mimeType))
+ {
+ activities.noDocument(documentIdentifier,versionString);
+ errorCode = activities.EXCLUDED_MIMETYPE;
+ errorDesc = "Excluding due to mime type ("+mimeType+")";
+ continue;
}
- //properties
- List<Property<?>> properties = document.getProperties();
- String id = StringUtils.EMPTY;
- for (Property<?> property : properties) {
- String propertyId = property.getId();
+ if (!activities.checkLengthIndexable(fileLength))
+ {
+ activities.noDocument(documentIdentifier,versionString);
+ errorCode = activities.EXCLUDED_LENGTH;
+ errorDesc = "Excluding due to length ("+fileLength+")";
+ continue;
+ }
+
+ if (!activities.checkDateIndexable(modifiedDate))
+ {
+ activities.noDocument(documentIdentifier,versionString);
+ errorCode = activities.EXCLUDED_DATE;
+ errorDesc = "Excluding due to date ("+modifiedDate+")";
+ continue;
+ }
+
+ RepositoryDocument rd = new RepositoryDocument();
+ rd.setFileName(fileName);
+ rd.setMimeType(mimeType);
+ rd.setCreatedDate(createdDate);
+ rd.setModifiedDate(modifiedDate);
- if(CmisRepositoryConnectorUtils.existsInSelectClause(cmisQuery, propertyId)){
+ InputStream is;
+ try {
+ if (fileLength > 0)
+ is = document.getContentStream().getStream();
+ else
+ is = null;
+ } catch (CmisObjectNotFoundException e) {
+ // Document gone
+ activities.deleteDocument(documentIdentifier);
+ continue;
+ }
+
+ try {
+ //binary
+ if(is != null) {
+ rd.setBinary(is, fileLength);
+ } else {
+ rd.setBinary(new NullInputStream(0),0);
+ }
+
+ //properties
+ List<Property<?>> properties = document.getProperties();
+ String id = StringUtils.EMPTY;
+ for (Property<?> property : properties) {
+ String propertyId = property.getId();
- if (propertyId.endsWith(Constants.PARAM_OBJECT_ID)) {
- id = (String) property.getValue();
-
- if (property.getValue() !=null
- || property.getValues() != null) {
- PropertyType propertyType = property.getType();
-
- switch (propertyType) {
-
- case STRING:
- case ID:
- case URI:
- case HTML:
- if(property.isMultiValued()){
- List<String> htmlPropertyValues = (List<String>) property.getValues();
- for (String htmlPropertyValue : htmlPropertyValues) {
- rd.addField(propertyId, htmlPropertyValue);
- }
- } else {
- String stringValue = (String) property.getValue();
- if(StringUtils.isNotEmpty(stringValue)){
- rd.addField(propertyId, stringValue);
- }
- }
- break;
-
- case BOOLEAN:
- if(property.isMultiValued()){
- List<Boolean> booleanPropertyValues = (List<Boolean>) property.getValues();
- for (Boolean booleanPropertyValue : booleanPropertyValues) {
- rd.addField(propertyId, booleanPropertyValue.toString());
- }
- } else {
- Boolean booleanValue = (Boolean) property.getValue();
- if(booleanValue!=null){
- rd.addField(propertyId, booleanValue.toString());
- }
- }
- break;
+ if(CmisRepositoryConnectorUtils.existsInSelectClause(cmisQuery, propertyId)){
+
+ if (propertyId.endsWith(Constants.PARAM_OBJECT_ID)) {
+ id = (String) property.getValue();
- case INTEGER:
- if(property.isMultiValued()){
- List<BigInteger> integerPropertyValues = (List<BigInteger>) property.getValues();
- for (BigInteger integerPropertyValue : integerPropertyValues) {
- rd.addField(propertyId, integerPropertyValue.toString());
- }
- } else {
- BigInteger integerValue = (BigInteger) property.getValue();
- if(integerValue!=null){
- rd.addField(propertyId, integerValue.toString());
+ if (property.getValue() !=null
+ || property.getValues() != null) {
+ PropertyType propertyType = property.getType();
+
+ switch (propertyType) {
+
+ case STRING:
+ case ID:
+ case URI:
+ case HTML:
+ if(property.isMultiValued()){
+ List<String> htmlPropertyValues = (List<String>) property.getValues();
+ for (String htmlPropertyValue : htmlPropertyValues) {
+ rd.addField(propertyId, htmlPropertyValue);
+ }
+ } else {
+ String stringValue = (String) property.getValue();
+ if(StringUtils.isNotEmpty(stringValue)){
+ rd.addField(propertyId, stringValue);
+ }
}
- }
- break;
-
- case DECIMAL:
- if(property.isMultiValued()){
- List<BigDecimal> decimalPropertyValues = (List<BigDecimal>) property.getValues();
- for (BigDecimal decimalPropertyValue : decimalPropertyValues) {
- rd.addField(propertyId, decimalPropertyValue.toString());
+ break;
+
+ case BOOLEAN:
+ if(property.isMultiValued()){
+ List<Boolean> booleanPropertyValues = (List<Boolean>) property.getValues();
+ for (Boolean booleanPropertyValue : booleanPropertyValues) {
+ rd.addField(propertyId, booleanPropertyValue.toString());
+ }
+ } else {
+ Boolean booleanValue = (Boolean) property.getValue();
+ if(booleanValue!=null){
+ rd.addField(propertyId, booleanValue.toString());
+ }
}
- } else {
- BigDecimal decimalValue = (BigDecimal) property.getValue();
- if(decimalValue!=null){
- rd.addField(propertyId, decimalValue.toString());
+ break;
+
+ case INTEGER:
+ if(property.isMultiValued()){
+ List<BigInteger> integerPropertyValues = (List<BigInteger>) property.getValues();
+ for (BigInteger integerPropertyValue : integerPropertyValues) {
+ rd.addField(propertyId, integerPropertyValue.toString());
+ }
+ } else {
+ BigInteger integerValue = (BigInteger) property.getValue();
+ if(integerValue!=null){
+ rd.addField(propertyId, integerValue.toString());
+ }
}
- }
- break;
-
- case DATETIME:
- if(property.isMultiValued()){
- List<GregorianCalendar> datePropertyValues = (List<GregorianCalendar>) property.getValues();
- for (GregorianCalendar datePropertyValue : datePropertyValues) {
- rd.addField(propertyId,
- ISO8601_DATE_FORMATTER.format(datePropertyValue.getTime()));
+ break;
+
+ case DECIMAL:
+ if(property.isMultiValued()){
+ List<BigDecimal> decimalPropertyValues = (List<BigDecimal>) property.getValues();
+ for (BigDecimal decimalPropertyValue : decimalPropertyValues) {
+ rd.addField(propertyId, decimalPropertyValue.toString());
+ }
+ } else {
+ BigDecimal decimalValue = (BigDecimal) property.getValue();
+ if(decimalValue!=null){
+ rd.addField(propertyId, decimalValue.toString());
+ }
}
- } else {
- GregorianCalendar dateValue = (GregorianCalendar) property.getValue();
- if(dateValue!=null){
- rd.addField(propertyId, ISO8601_DATE_FORMATTER.format(dateValue.getTime()));
+ break;
+
+ case DATETIME:
+ if(property.isMultiValued()){
+ List<GregorianCalendar> datePropertyValues = (List<GregorianCalendar>) property.getValues();
+ for (GregorianCalendar datePropertyValue : datePropertyValues) {
+ rd.addField(propertyId,
+ ISO8601_DATE_FORMATTER.format(datePropertyValue.getTime()));
+ }
+ } else {
+ GregorianCalendar dateValue = (GregorianCalendar) property.getValue();
+ if(dateValue!=null){
+ rd.addField(propertyId, ISO8601_DATE_FORMATTER.format(dateValue.getTime()));
+ }
}
+ break;
+
+ default:
+ break;
}
- break;
-
- default:
- break;
}
+
}
-
+
}
-
}
- }
-
- //ingestion
-
- try {
- activities.ingestDocumentWithException(documentIdentifier, versionString, documentURI, rd);
- } catch (IOException e) {
- errorCode = "IO ERROR";
- errorDesc = e.getMessage();
- handleIOException(e, "reading file input stream");
- }
- } finally {
- try {
- if(is!=null){
- is.close();
+ //ingestion
+
+
+ try {
+ activities.ingestDocumentWithException(documentIdentifier, versionString, documentURI, rd);
+ fileLengthLong = new Long(fileLength);
+ errorCode = "OK";
+ } catch (IOException e) {
+ if (e instanceof InterruptedIOException && !(e instanceof java.net.SocketTimeoutException))
+ errorCode = null;
+ else
+ errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
+ errorDesc = e.getMessage();
+ handleIOException(e, "reading file input stream");
}
- } catch (IOException e) {
- errorCode = "IO ERROR";
- errorDesc = e.getMessage();
- handleIOException(e, "closing file input stream");
} finally {
- activities.recordActivity(new Long(startTime), ACTIVITY_READ,
- fileLength, documentIdentifier, errorCode, errorDesc, null);
+ try {
+ if(is!=null){
+ is.close();
+ }
+ } catch (IOException e) {
+ if (e instanceof InterruptedIOException && !(e instanceof java.net.SocketTimeoutException))
+ errorCode = null;
+ else
+ errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
+ errorDesc = e.getMessage();
+ handleIOException(e, "closing file input stream");
+ }
}
+ } else {
+ // Unrecognized document type
+ activities.noDocument(documentIdentifier,versionString);
+ errorCode = "UNKNOWNTYPE";
+ errorDesc = "Document type is unrecognized: '"+baseTypeId+"'";
}
+ } finally {
+ if (errorCode != null)
+ activities.recordActivity(new Long(startTime), ACTIVITY_READ,
+ fileLengthLong, documentIdentifier, errorCode, errorDesc, null);
}
- else
- activities.noDocument(documentIdentifier,versionString);
}
}