You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/10/08 14:05:03 UTC
svn commit: r1630077 - in /manifoldcf/branches/CONNECTORS-1067/connectors:
alfresco-webscript/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfrescowebscript/
cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis...
Author: kwright
Date: Wed Oct 8 12:05:03 2014
New Revision: 1630077
URL: http://svn.apache.org/r1630077
Log:
Hook up date check in alfresco-webscript, cmis, and sharepoint connectors
Modified:
manifoldcf/branches/CONNECTORS-1067/connectors/alfresco-webscript/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfrescowebscript/AlfrescoConnector.java
manifoldcf/branches/CONNECTORS-1067/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java
manifoldcf/branches/CONNECTORS-1067/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
Modified: manifoldcf/branches/CONNECTORS-1067/connectors/alfresco-webscript/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfrescowebscript/AlfrescoConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1067/connectors/alfresco-webscript/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfrescowebscript/AlfrescoConnector.java?rev=1630077&r1=1630076&r2=1630077&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1067/connectors/alfresco-webscript/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfrescowebscript/AlfrescoConnector.java (original)
+++ manifoldcf/branches/CONNECTORS-1067/connectors/alfresco-webscript/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfrescowebscript/AlfrescoConnector.java Wed Oct 8 12:05:03 2014
@@ -271,11 +271,16 @@ public class AlfrescoConnector extends B
continue;
}
- if (mimeType != null && !activities.checkMimeTypeIndexable(mimeType)) {
+ if (!activities.checkMimeTypeIndexable(mimeType)) {
activities.noDocument(doc, documentVersion);
continue;
}
+ if (!activities.checkDateIndexable(modifiedDate)) {
+ activities.noDocument(doc, documentVersion);
+ continue;
+ }
+
RepositoryDocument rd = new RepositoryDocument();
rd.addField(FIELD_NODEREF, nodeRef);
rd.addField(FIELD_TYPE, type);
Modified: manifoldcf/branches/CONNECTORS-1067/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1067/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java?rev=1630077&r1=1630076&r2=1630077&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1067/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java (original)
+++ manifoldcf/branches/CONNECTORS-1067/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java Wed Oct 8 12:05:03 2014
@@ -1143,171 +1143,198 @@ public class CmisRepositoryConnector ext
activities.addDocumentReference(child.getId(), documentIdentifier,
RELATIONSHIP_CHILD);
}
- } else if(baseTypeId.equals(CMIS_DOCUMENT_BASE_TYPE)){
- // content ingestion
+ } else if(baseTypeId.equals(CMIS_DOCUMENT_BASE_TYPE)) {
+ // content ingestion
- Document document = (Document) cmisObject;
- long fileLength;
- InputStream is;
- try {
- fileLength = document.getContentStreamLength();
- if (fileLength > 0)
- is = document.getContentStream().getStream();
- else
- is = null;
- } catch (CmisObjectNotFoundException e) {
- // Document gone
- activities.deleteDocument(documentIdentifier);
- continue;
- }
+ Document document = (Document) cmisObject;
- try {
- RepositoryDocument rd = new RepositoryDocument();
Date createdDate = document.getCreationDate().getTime();
Date modifiedDate = document.getLastModificationDate().getTime();
-
- rd.setFileName(document.getContentStreamFileName());
- rd.setMimeType(document.getContentStreamMimeType());
+ long fileLength = document.getContentStreamLength();
+ String fileName = document.getContentStreamFileName();
+ String mimeType = document.getContentStreamMimeType();
+ //documentURI
+ String documentURI = CmisRepositoryConnectorUtils.getDocumentURL(document, session);
+
+ // Do any filtering (which will save us work)
+ if (!activities.checkURLIndexable(documentURI))
+ {
+ activities.noDocument(documentIdentifier,versionString);
+ continue;
+ }
+
+ if (!activities.checkMimeTypeIndexable(mimeType))
+ {
+ activities.noDocument(documentIdentifier,versionString);
+ continue;
+ }
+
+ if (!activities.checkLengthIndexable(fileLength))
+ {
+ activities.noDocument(documentIdentifier,versionString);
+ continue;
+ }
+
+ if (!activities.checkDateIndexable(modifiedDate))
+ {
+ activities.noDocument(documentIdentifier,versionString);
+ continue;
+ }
+
+ RepositoryDocument rd = new RepositoryDocument();
+ rd.setFileName(fileName);
+ rd.setMimeType(mimeType);
rd.setCreatedDate(createdDate);
rd.setModifiedDate(modifiedDate);
-
- //binary
- if(is != null) {
- rd.setBinary(is, fileLength);
- } else {
- rd.setBinary(new NullInputStream(0),0);
+
+ InputStream is;
+ try {
+ if (fileLength > 0)
+ is = document.getContentStream().getStream();
+ else
+ is = null;
+ } catch (CmisObjectNotFoundException e) {
+ // Document gone
+ activities.deleteDocument(documentIdentifier);
+ continue;
}
+
+ try {
+ //binary
+ if(is != null) {
+ rd.setBinary(is, fileLength);
+ } else {
+ rd.setBinary(new NullInputStream(0),0);
+ }
- //properties
- List<Property<?>> properties = document.getProperties();
- String id = StringUtils.EMPTY;
- for (Property<?> property : properties) {
- String propertyId = property.getId();
-
- if(CmisRepositoryConnectorUtils.existsInSelectClause(cmisQuery, propertyId)){
+ //properties
+ List<Property<?>> properties = document.getProperties();
+ String id = StringUtils.EMPTY;
+ for (Property<?> property : properties) {
+ String propertyId = property.getId();
- if (propertyId.endsWith(Constants.PARAM_OBJECT_ID)) {
- id = (String) property.getValue();
-
- if (property.getValue() !=null
- || property.getValues() != null) {
- PropertyType propertyType = property.getType();
-
- switch (propertyType) {
-
- case STRING:
- case ID:
- case URI:
- case HTML:
- if(property.isMultiValued()){
- List<String> htmlPropertyValues = (List<String>) property.getValues();
- for (String htmlPropertyValue : htmlPropertyValues) {
- rd.addField(propertyId, htmlPropertyValue);
- }
- } else {
- String stringValue = (String) property.getValue();
- if(StringUtils.isNotEmpty(stringValue)){
- rd.addField(propertyId, stringValue);
- }
- }
- break;
-
- case BOOLEAN:
- if(property.isMultiValued()){
- List<Boolean> booleanPropertyValues = (List<Boolean>) property.getValues();
- for (Boolean booleanPropertyValue : booleanPropertyValues) {
- rd.addField(propertyId, booleanPropertyValue.toString());
- }
- } else {
- Boolean booleanValue = (Boolean) property.getValue();
- if(booleanValue!=null){
- rd.addField(propertyId, booleanValue.toString());
- }
- }
- break;
-
- case INTEGER:
- if(property.isMultiValued()){
- List<BigInteger> integerPropertyValues = (List<BigInteger>) property.getValues();
- for (BigInteger integerPropertyValue : integerPropertyValues) {
- rd.addField(propertyId, integerPropertyValue.toString());
- }
- } else {
- BigInteger integerValue = (BigInteger) property.getValue();
- if(integerValue!=null){
- rd.addField(propertyId, integerValue.toString());
- }
- }
- break;
-
- case DECIMAL:
- if(property.isMultiValued()){
- List<BigDecimal> decimalPropertyValues = (List<BigDecimal>) property.getValues();
- for (BigDecimal decimalPropertyValue : decimalPropertyValues) {
- rd.addField(propertyId, decimalPropertyValue.toString());
- }
- } else {
- BigDecimal decimalValue = (BigDecimal) property.getValue();
- if(decimalValue!=null){
- rd.addField(propertyId, decimalValue.toString());
- }
- }
- break;
+ if(CmisRepositoryConnectorUtils.existsInSelectClause(cmisQuery, propertyId)){
+
+ if (propertyId.endsWith(Constants.PARAM_OBJECT_ID)) {
+ id = (String) property.getValue();
- case DATETIME:
- if(property.isMultiValued()){
- List<GregorianCalendar> datePropertyValues = (List<GregorianCalendar>) property.getValues();
- for (GregorianCalendar datePropertyValue : datePropertyValues) {
- rd.addField(propertyId,
- ISO8601_DATE_FORMATTER.format(datePropertyValue.getTime()));
- }
- } else {
- GregorianCalendar dateValue = (GregorianCalendar) property.getValue();
- if(dateValue!=null){
- rd.addField(propertyId, ISO8601_DATE_FORMATTER.format(dateValue.getTime()));
- }
+ if (property.getValue() !=null
+ || property.getValues() != null) {
+ PropertyType propertyType = property.getType();
+
+ switch (propertyType) {
+
+ case STRING:
+ case ID:
+ case URI:
+ case HTML:
+ if(property.isMultiValued()){
+ List<String> htmlPropertyValues = (List<String>) property.getValues();
+ for (String htmlPropertyValue : htmlPropertyValues) {
+ rd.addField(propertyId, htmlPropertyValue);
+ }
+ } else {
+ String stringValue = (String) property.getValue();
+ if(StringUtils.isNotEmpty(stringValue)){
+ rd.addField(propertyId, stringValue);
+ }
+ }
+ break;
+
+ case BOOLEAN:
+ if(property.isMultiValued()){
+ List<Boolean> booleanPropertyValues = (List<Boolean>) property.getValues();
+ for (Boolean booleanPropertyValue : booleanPropertyValues) {
+ rd.addField(propertyId, booleanPropertyValue.toString());
+ }
+ } else {
+ Boolean booleanValue = (Boolean) property.getValue();
+ if(booleanValue!=null){
+ rd.addField(propertyId, booleanValue.toString());
+ }
+ }
+ break;
+
+ case INTEGER:
+ if(property.isMultiValued()){
+ List<BigInteger> integerPropertyValues = (List<BigInteger>) property.getValues();
+ for (BigInteger integerPropertyValue : integerPropertyValues) {
+ rd.addField(propertyId, integerPropertyValue.toString());
+ }
+ } else {
+ BigInteger integerValue = (BigInteger) property.getValue();
+ if(integerValue!=null){
+ rd.addField(propertyId, integerValue.toString());
+ }
+ }
+ break;
+
+ case DECIMAL:
+ if(property.isMultiValued()){
+ List<BigDecimal> decimalPropertyValues = (List<BigDecimal>) property.getValues();
+ for (BigDecimal decimalPropertyValue : decimalPropertyValues) {
+ rd.addField(propertyId, decimalPropertyValue.toString());
+ }
+ } else {
+ BigDecimal decimalValue = (BigDecimal) property.getValue();
+ if(decimalValue!=null){
+ rd.addField(propertyId, decimalValue.toString());
+ }
+ }
+ break;
+
+ case DATETIME:
+ if(property.isMultiValued()){
+ List<GregorianCalendar> datePropertyValues = (List<GregorianCalendar>) property.getValues();
+ for (GregorianCalendar datePropertyValue : datePropertyValues) {
+ rd.addField(propertyId,
+ ISO8601_DATE_FORMATTER.format(datePropertyValue.getTime()));
+ }
+ } else {
+ GregorianCalendar dateValue = (GregorianCalendar) property.getValue();
+ if(dateValue!=null){
+ rd.addField(propertyId, ISO8601_DATE_FORMATTER.format(dateValue.getTime()));
+ }
+ }
+ break;
+
+ default:
+ break;
}
- break;
-
- default:
- break;
}
+
}
-
+
}
-
}
- }
-
- //ingestion
- //documentURI
- String documentURI = CmisRepositoryConnectorUtils.getDocumentURL(document, session);
-
- try {
- activities.ingestDocumentWithException(documentIdentifier, versionString, documentURI, rd);
- } catch (IOException e) {
- errorCode = "IO ERROR";
- errorDesc = e.getMessage();
- handleIOException(e, "reading file input stream");
- }
- } finally {
- try {
- if(is!=null){
- is.close();
+ //ingestion
+
+
+ try {
+ activities.ingestDocumentWithException(documentIdentifier, versionString, documentURI, rd);
+ } catch (IOException e) {
+ errorCode = "IO ERROR";
+ errorDesc = e.getMessage();
+ handleIOException(e, "reading file input stream");
}
- } catch (IOException e) {
- errorCode = "IO ERROR";
- errorDesc = e.getMessage();
- handleIOException(e, "closing file input stream");
} finally {
- activities.recordActivity(new Long(startTime), ACTIVITY_READ,
- fileLength, documentIdentifier, errorCode, errorDesc, null);
+ try {
+ if(is!=null){
+ is.close();
+ }
+ } catch (IOException e) {
+ errorCode = "IO ERROR";
+ errorDesc = e.getMessage();
+ handleIOException(e, "closing file input stream");
+ } finally {
+ activities.recordActivity(new Long(startTime), ACTIVITY_READ,
+ fileLength, documentIdentifier, errorCode, errorDesc, null);
+ }
}
}
- }
- else
- activities.deleteDocument(documentIdentifier);
+ else
+ activities.noDocument(documentIdentifier,versionString);
}
}
Modified: manifoldcf/branches/CONNECTORS-1067/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1067/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java?rev=1630077&r1=1630076&r2=1630077&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1067/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java (original)
+++ manifoldcf/branches/CONNECTORS-1067/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java Wed Oct 8 12:05:03 2014
@@ -1634,226 +1634,229 @@ public class SharePointRepository extend
throws ManifoldCFException, ServiceInterruption
{
// Before we fetch, confirm that the output connector will accept the document
- if (activities.checkURLIndexable(fileUrl))
+ if (!activities.checkURLIndexable(fileUrl))
{
- // Also check mime type
- String contentType = mapExtensionToMimeType(documentIdentifier);
- if (activities.checkMimeTypeIndexable(contentType))
+ // URL failed
+ if (Logging.connectors.isDebugEnabled())
+ Logging.connectors.debug("SharePoint: Skipping document '"+documentIdentifier+"' because output connector says URL '"+fileUrl+"' is not indexable");
+ return false;
+ }
+
+ // Also check mime type
+ String contentType = mapExtensionToMimeType(documentIdentifier);
+ if (!activities.checkMimeTypeIndexable(contentType))
+ {
+ // Mime type failed
+ if (Logging.connectors.isDebugEnabled())
+ Logging.connectors.debug("SharePoint: Skipping document '"+documentIdentifier+"' because output connector says mime type '"+((contentType==null)?"null":contentType)+"' is not indexable");
+ return false;
+ }
+
+ // Now check date stamp
+ if (!activities.checkDateIndexable(modifiedDate))
+ {
+ // Date failed
+ if (Logging.connectors.isDebugEnabled())
+ Logging.connectors.debug("SharePoint: Skipping document '"+documentIdentifier+"' because output connector says date '"+((modifiedDate==null)?"null":modifiedDate)+"' is not indexable");
+ return false;
+ }
+
+ // Set stuff up for fetch activity logging
+ long startFetchTime = System.currentTimeMillis();
+ try
+ {
+ // Read the document into a local temporary file, so I get a reliable length.
+ File tempFile = File.createTempFile("__shp__",".tmp");
+ try
{
- // Set stuff up for fetch activity logging
- long startFetchTime = System.currentTimeMillis();
+ // Open the output stream
+ OutputStream os = new FileOutputStream(tempFile);
try
{
- // Read the document into a local temporary file, so I get a reliable length.
- File tempFile = File.createTempFile("__shp__",".tmp");
+ // Catch all exceptions having to do with reading the document
try
{
- // Open the output stream
- OutputStream os = new FileOutputStream(tempFile);
- try
- {
- // Catch all exceptions having to do with reading the document
- try
- {
- ExecuteMethodThread emt = new ExecuteMethodThread(httpClient, fetchUrl, os);
- emt.start();
- int returnCode = emt.finishUp();
+ ExecuteMethodThread emt = new ExecuteMethodThread(httpClient, fetchUrl, os);
+ emt.start();
+ int returnCode = emt.finishUp();
- if (returnCode == 404 || returnCode == 401 || returnCode == 400 || returnCode == 415)
- {
- // Well, sharepoint thought the document was there, but it really isn't, so delete it.
- if (Logging.connectors.isDebugEnabled())
- Logging.connectors.debug("SharePoint: Document at '"+fileUrl+"' failed to fetch with code "+Integer.toString(returnCode)+", deleting");
- activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
- null,documentIdentifier,"Not found",Integer.toString(returnCode),null);
- return false;
- }
- else if (returnCode != 200)
- {
- activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
- null,documentIdentifier,"Error","Http status "+Integer.toString(returnCode),null);
- throw new ManifoldCFException("Error fetching document '"+fileUrl+"': "+Integer.toString(returnCode));
- }
-
- // Log the normal fetch activity
- activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
- new Long(tempFile.length()),documentIdentifier,"Success",null,null);
-
- }
- catch (InterruptedException e)
- {
- throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
- }
- catch (java.net.SocketTimeoutException e)
- {
- activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
- new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
- Logging.connectors.warn("SharePoint: SocketTimeoutException thrown: "+e.getMessage(),e);
- long currentTime = System.currentTimeMillis();
- throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"', retrying: "+e.getMessage(),e,currentTime + 300000L,
- currentTime + 12 * 60 * 60000L,-1,true);
- }
- catch (org.apache.http.conn.ConnectTimeoutException e)
- {
- activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
- new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
- Logging.connectors.warn("SharePoint: ConnectTimeoutException thrown: "+e.getMessage(),e);
- long currentTime = System.currentTimeMillis();
- throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"', retrying: "+e.getMessage(),e,currentTime + 300000L,
- currentTime + 12 * 60 * 60000L,-1,true);
- }
- catch (InterruptedIOException e)
- {
- throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
- }
- catch (IllegalArgumentException e)
- {
- Logging.connectors.error("SharePoint: Illegal argument", e);
- activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
- new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
- throw new ManifoldCFException("SharePoint: Illegal argument: "+e.getMessage(),e);
- }
- catch (org.apache.http.HttpException e)
- {
- Logging.connectors.warn("SharePoint: HttpException thrown",e);
- activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
- new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
- long currentTime = System.currentTimeMillis();
- throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"', retrying: "+e.getMessage(),e,currentTime + 300000L,
- currentTime + 12 * 60 * 60000L,-1,true);
- }
- catch (IOException e)
- {
- activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
- new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
- Logging.connectors.warn("SharePoint: IOException thrown: "+e.getMessage(),e);
- long currentTime = System.currentTimeMillis();
- throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"', retrying: "+e.getMessage(),e,currentTime + 300000L,
- currentTime + 12 * 60 * 60000L,-1,true);
- }
- }
- finally
+ if (returnCode == 404 || returnCode == 401 || returnCode == 400 || returnCode == 415)
{
- os.close();
+ // Well, sharepoint thought the document was there, but it really isn't, so delete it.
+ if (Logging.connectors.isDebugEnabled())
+ Logging.connectors.debug("SharePoint: Document at '"+fileUrl+"' failed to fetch with code "+Integer.toString(returnCode)+", deleting");
+ activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+ null,documentIdentifier,"Not found",Integer.toString(returnCode),null);
+ return false;
}
-
- // Ingest the document
- long documentLength = tempFile.length();
- if (activities.checkLengthIndexable(documentLength))
+ else if (returnCode != 200)
{
- InputStream is = new FileInputStream(tempFile);
- try
- {
- RepositoryDocument data = new RepositoryDocument();
- data.setBinary( is, documentLength );
-
- data.setFileName(mapToFileName(documentIdentifier));
-
- if (contentType != null)
- data.setMimeType(contentType);
-
- setDataACLs(data,accessTokens,denyTokens);
-
- setPathAttribute(data,sDesc,documentIdentifier);
-
- if (modifiedDate != null)
- data.setModifiedDate(modifiedDate);
- if (createdDate != null)
- data.setCreatedDate(createdDate);
+ activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+ null,documentIdentifier,"Error","Http status "+Integer.toString(returnCode),null);
+ throw new ManifoldCFException("Error fetching document '"+fileUrl+"': "+Integer.toString(returnCode));
+ }
- if (metadataValues != null)
- {
- Iterator<String> iter = metadataValues.keySet().iterator();
- while (iter.hasNext())
- {
- String fieldName = iter.next();
- String fieldData = metadataValues.get(fieldName);
- data.addField(fieldName,fieldData);
- }
- }
- data.addField("GUID",guid);
+ // Log the normal fetch activity
+ activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+ new Long(tempFile.length()),documentIdentifier,"Success",null,null);
- try
- {
- activities.ingestDocumentWithException( documentIdentifier, version, fileUrl , data );
- }
- catch (IOException e)
- {
- handleIOException(e,"reading document");
- }
- return true;
- }
- finally
- {
- try
- {
- is.close();
- }
- catch (java.net.SocketTimeoutException e)
- {
- // This is not fatal
- Logging.connectors.debug("SharePoint: Timeout before read could finish for '"+fileUrl+"': "+e.getMessage(),e);
- }
- catch (org.apache.http.conn.ConnectTimeoutException e)
- {
- // This is not fatal
- Logging.connectors.debug("SharePoint: Connect timeout before read could finish for '"+fileUrl+"': "+e.getMessage(),e);
- }
- catch (InterruptedIOException e)
- {
- throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
- }
- catch (IOException e)
- {
- // This is not fatal
- Logging.connectors.debug("SharePoint: Server closed connection before read could finish for '"+fileUrl+"': "+e.getMessage(),e);
- }
- }
- }
- else
- {
- // Document too long
- if (Logging.connectors.isDebugEnabled())
- Logging.connectors.debug("SharePoint: Document '"+documentIdentifier+"' was too long, according to output connector");
- return false;
- }
}
- finally
+ catch (InterruptedException e)
+ {
+ throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+ }
+ catch (java.net.SocketTimeoutException e)
+ {
+ activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+ new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
+ Logging.connectors.warn("SharePoint: SocketTimeoutException thrown: "+e.getMessage(),e);
+ long currentTime = System.currentTimeMillis();
+ throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"', retrying: "+e.getMessage(),e,currentTime + 300000L,
+ currentTime + 12 * 60 * 60000L,-1,true);
+ }
+ catch (org.apache.http.conn.ConnectTimeoutException e)
+ {
+ activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+ new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
+ Logging.connectors.warn("SharePoint: ConnectTimeoutException thrown: "+e.getMessage(),e);
+ long currentTime = System.currentTimeMillis();
+ throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"', retrying: "+e.getMessage(),e,currentTime + 300000L,
+ currentTime + 12 * 60 * 60000L,-1,true);
+ }
+ catch (InterruptedIOException e)
+ {
+ throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+ }
+ catch (IllegalArgumentException e)
+ {
+ Logging.connectors.error("SharePoint: Illegal argument", e);
+ activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+ new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
+ throw new ManifoldCFException("SharePoint: Illegal argument: "+e.getMessage(),e);
+ }
+ catch (org.apache.http.HttpException e)
+ {
+ Logging.connectors.warn("SharePoint: HttpException thrown",e);
+ activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+ new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
+ long currentTime = System.currentTimeMillis();
+ throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"', retrying: "+e.getMessage(),e,currentTime + 300000L,
+ currentTime + 12 * 60 * 60000L,-1,true);
+ }
+ catch (IOException e)
{
- tempFile.delete();
+ activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+ new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
+ Logging.connectors.warn("SharePoint: IOException thrown: "+e.getMessage(),e);
+ long currentTime = System.currentTimeMillis();
+ throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"', retrying: "+e.getMessage(),e,currentTime + 300000L,
+ currentTime + 12 * 60 * 60000L,-1,true);
}
}
- catch (java.net.SocketTimeoutException e)
+ finally
{
- throw new ManifoldCFException("Socket timeout error writing '"+fileUrl+"' to temporary file: "+e.getMessage(),e);
+ os.close();
}
- catch (org.apache.http.conn.ConnectTimeoutException e)
+
+ // Ingest the document
+ long documentLength = tempFile.length();
+ if (!activities.checkLengthIndexable(documentLength))
{
- throw new ManifoldCFException("Connect timeout error writing '"+fileUrl+"' to temporary file: "+e.getMessage(),e);
+ // Document too long
+ if (Logging.connectors.isDebugEnabled())
+ Logging.connectors.debug("SharePoint: Document '"+documentIdentifier+"' was too long, according to output connector");
+ return false;
}
- catch (InterruptedIOException e)
+
+ InputStream is = new FileInputStream(tempFile);
+ try
{
- throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+ RepositoryDocument data = new RepositoryDocument();
+ data.setBinary( is, documentLength );
+
+ data.setFileName(mapToFileName(documentIdentifier));
+
+ if (contentType != null)
+ data.setMimeType(contentType);
+
+ setDataACLs(data,accessTokens,denyTokens);
+
+ setPathAttribute(data,sDesc,documentIdentifier);
+
+ if (modifiedDate != null)
+ data.setModifiedDate(modifiedDate);
+ if (createdDate != null)
+ data.setCreatedDate(createdDate);
+
+ if (metadataValues != null)
+ {
+ Iterator<String> iter = metadataValues.keySet().iterator();
+ while (iter.hasNext())
+ {
+ String fieldName = iter.next();
+ String fieldData = metadataValues.get(fieldName);
+ data.addField(fieldName,fieldData);
+ }
+ }
+ data.addField("GUID",guid);
+
+ try
+ {
+ activities.ingestDocumentWithException( documentIdentifier, version, fileUrl , data );
+ }
+ catch (IOException e)
+ {
+ handleIOException(e,"reading document");
+ }
+ return true;
}
- catch (IOException e)
+ finally
{
- throw new ManifoldCFException("IO error writing '"+fileUrl+"' to temporary file: "+e.getMessage(),e);
+ try
+ {
+ is.close();
+ }
+ catch (java.net.SocketTimeoutException e)
+ {
+ // This is not fatal
+ Logging.connectors.debug("SharePoint: Timeout before read could finish for '"+fileUrl+"': "+e.getMessage(),e);
+ }
+ catch (org.apache.http.conn.ConnectTimeoutException e)
+ {
+ // This is not fatal
+ Logging.connectors.debug("SharePoint: Connect timeout before read could finish for '"+fileUrl+"': "+e.getMessage(),e);
+ }
+ catch (InterruptedIOException e)
+ {
+ throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+ }
+ catch (IOException e)
+ {
+ // This is not fatal
+ Logging.connectors.debug("SharePoint: Server closed connection before read could finish for '"+fileUrl+"': "+e.getMessage(),e);
+ }
}
}
- else
+ finally
{
- // Mime type failed
- if (Logging.connectors.isDebugEnabled())
- Logging.connectors.debug("SharePoint: Skipping document '"+documentIdentifier+"' because output connector says mime type '"+((contentType==null)?"null":contentType)+"' is not indexable");
- return false;
+ tempFile.delete();
}
}
- else
+ catch (java.net.SocketTimeoutException e)
{
- // URL failed
- if (Logging.connectors.isDebugEnabled())
- Logging.connectors.debug("SharePoint: Skipping document '"+documentIdentifier+"' because output connector says URL '"+fileUrl+"' is not indexable");
- return false;
+ throw new ManifoldCFException("Socket timeout error writing '"+fileUrl+"' to temporary file: "+e.getMessage(),e);
+ }
+ catch (org.apache.http.conn.ConnectTimeoutException e)
+ {
+ throw new ManifoldCFException("Connect timeout error writing '"+fileUrl+"' to temporary file: "+e.getMessage(),e);
+ }
+ catch (InterruptedIOException e)
+ {
+ throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+ }
+ catch (IOException e)
+ {
+ throw new ManifoldCFException("IO error writing '"+fileUrl+"' to temporary file: "+e.getMessage(),e);
}
}