You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/10/08 19:54:48 UTC
svn commit: r1630188 [2/2] - in /manifoldcf/trunk: ./
connectors/alfresco-webscript/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfrescowebscript/
connectors/alfresco-webscript/connector/src/test/java/org/apache/manifoldcf/crawler/...
Modified: manifoldcf/trunk/connectors/livelink/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/livelink/LivelinkConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/livelink/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/livelink/LivelinkConnector.java?rev=1630188&r1=1630187&r2=1630188&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/livelink/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/livelink/LivelinkConnector.java (original)
+++ manifoldcf/trunk/connectors/livelink/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/livelink/LivelinkConnector.java Wed Oct 8 17:54:47 2014
@@ -4185,466 +4185,485 @@ public class LivelinkConnector extends o
try
{
// Check URL first
- if (activities.checkURLIndexable(viewHttpAddress))
+ if (!activities.checkURLIndexable(viewHttpAddress))
{
+ // Document not ingestable due to URL
+ resultDescription = "URL ("+viewHttpAddress+") was rejected by output connector";
+ if (Logging.connectors.isDebugEnabled())
+ Logging.connectors.debug("Livelink: Excluding document "+documentIdentifier+" because its URL ("+viewHttpAddress+") was rejected by output connector");
+ resultCode = "URLEXCLUSION";
+ activities.noDocument(documentIdentifier,version);
+ return;
+ }
+
+ // Add general metadata
+ ObjectInformation objInfo = llc.getObjectInformation(vol,objID);
+ VersionInformation versInfo = llc.getVersionInformation(vol,objID,0);
+ if (!objInfo.exists())
+ {
+ resultCode = "OBJECTNOTFOUND";
+ Logging.connectors.debug("Livelink: No object "+contextMsg+": not ingesting");
+ return;
+ }
+ if (!versInfo.exists())
+ {
+ resultCode = "VERSIONNOTFOUND";
+ Logging.connectors.debug("Livelink: No version data "+contextMsg+": not ingesting");
+ return;
+ }
- // Add general metadata
- ObjectInformation objInfo = llc.getObjectInformation(vol,objID);
- VersionInformation versInfo = llc.getVersionInformation(vol,objID,0);
- if (!objInfo.exists())
- {
- resultCode = "OBJECTNOTFOUND";
- Logging.connectors.debug("Livelink: No object "+contextMsg+": not ingesting");
- return;
- }
- if (!versInfo.exists())
- {
- resultCode = "VERSIONNOTFOUND";
- Logging.connectors.debug("Livelink: No version data "+contextMsg+": not ingesting");
- return;
- }
+ String mimeType = versInfo.getMimeType();
+ if (!activities.checkMimeTypeIndexable(mimeType))
+ {
+ // Document not indexable because of its mime type
+ resultDescription = "Mime type ("+mimeType+") was rejected by output connector";
+ if (Logging.connectors.isDebugEnabled())
+ Logging.connectors.debug("Livelink: Excluding document "+documentIdentifier+" because its mime type ("+mimeType+") was rejected by output connector");
+ resultCode = "MIMETYPEEXCLUSION";
+ activities.noDocument(documentIdentifier,version);
+ return;
+ }
+
+ Long dataSize = versInfo.getDataSize();
+ if (dataSize == null)
+ {
+ // Document had no length
+ resultDescription = "Document had no length";
+ if (Logging.connectors.isDebugEnabled())
+ Logging.connectors.debug("Livelink: Excluding document "+documentIdentifier+" because it had no length");
+ resultCode = "DOCUMENTNOLENGTH";
+ activities.noDocument(documentIdentifier,version);
+ return;
+ }
+
+ if (!activities.checkLengthIndexable(dataSize.longValue()))
+ {
+ // Document not indexable because of its length
+ resultDescription = "Document length ("+dataSize+") was rejected by output connector";
+ if (Logging.connectors.isDebugEnabled())
+ Logging.connectors.debug("Livelink: Excluding document "+documentIdentifier+" because its length ("+dataSize+") was rejected by output connector");
+ resultCode = "DOCUMENTTOOLONG";
+ activities.noDocument(documentIdentifier,version);
+ return;
+ }
- String mimeType = versInfo.getMimeType();
- if (activities.checkMimeTypeIndexable(mimeType))
- {
- Long dataSize = versInfo.getDataSize();
- if (dataSize != null && activities.checkLengthIndexable(dataSize.longValue()))
- {
- String fileName = versInfo.getFileName();
- Date creationDate = objInfo.getCreationDate();
- Date modifyDate = versInfo.getModifyDate();
- Integer parentID = objInfo.getParentId();
- RepositoryDocument rd = new RepositoryDocument();
+ Date modifyDate = versInfo.getModifyDate();
+ if (!activities.checkDateIndexable(modifyDate))
+ {
+ // Document not indexable because of its date
+ resultDescription = "Document date ("+modifyDate+") was rejected by output connector";
+ if (Logging.connectors.isDebugEnabled())
+ Logging.connectors.debug("Livelink: Excluding document "+documentIdentifier+" because its date ("+modifyDate+") was rejected by output connector");
+ resultCode = "DOCUMENTBADDATE";
+ activities.noDocument(documentIdentifier,version);
+ return;
+ }
+
+ String fileName = versInfo.getFileName();
+ Date creationDate = objInfo.getCreationDate();
+ Integer parentID = objInfo.getParentId();
+
+
+ RepositoryDocument rd = new RepositoryDocument();
+ // Add general data we need for the output connector
+ if (mimeType != null)
+ rd.setMimeType(mimeType);
+ if (fileName != null)
+ rd.setFileName(fileName);
+ if (creationDate != null)
+ rd.setCreatedDate(creationDate);
+ if (modifyDate != null)
+ rd.setModifiedDate(modifyDate);
- // Add general data we need for the output connector
- if (mimeType != null)
- rd.setMimeType(mimeType);
- if (fileName != null)
- rd.setFileName(fileName);
- if (creationDate != null)
- rd.setCreatedDate(creationDate);
- if (modifyDate != null)
- rd.setModifiedDate(modifyDate);
-
- rd.addField(GENERAL_NAME_FIELD,objInfo.getName());
- rd.addField(GENERAL_DESCRIPTION_FIELD,objInfo.getComments());
- if (creationDate != null)
- rd.addField(GENERAL_CREATIONDATE_FIELD,new Long(creationDate.getTime()).toString());
- if (modifyDate != null)
- rd.addField(GENERAL_MODIFYDATE_FIELD,new Long(modifyDate.getTime()).toString());
- if (parentID != null)
- rd.addField(GENERAL_PARENTID,parentID.toString());
- UserInformation owner = llc.getUserInformation(objInfo.getOwnerId().intValue());
- UserInformation creator = llc.getUserInformation(objInfo.getCreatorId().intValue());
- UserInformation modifier = llc.getUserInformation(versInfo.getOwnerId().intValue());
- if (owner != null)
- rd.addField(GENERAL_OWNER,owner.getName());
- if (creator != null)
- rd.addField(GENERAL_CREATOR,creator.getName());
- if (modifier != null)
- rd.addField(GENERAL_MODIFIER,modifier.getName());
-
- // Iterate over the metadata items. These are organized by category
- // for speed of lookup.
-
- Iterator<MetadataItem> catIter = desc.getItems(categoryPaths);
- while (catIter.hasNext())
+ rd.addField(GENERAL_NAME_FIELD,objInfo.getName());
+ rd.addField(GENERAL_DESCRIPTION_FIELD,objInfo.getComments());
+ if (creationDate != null)
+ rd.addField(GENERAL_CREATIONDATE_FIELD,new Long(creationDate.getTime()).toString());
+ if (modifyDate != null)
+ rd.addField(GENERAL_MODIFYDATE_FIELD,new Long(modifyDate.getTime()).toString());
+ if (parentID != null)
+ rd.addField(GENERAL_PARENTID,parentID.toString());
+ UserInformation owner = llc.getUserInformation(objInfo.getOwnerId().intValue());
+ UserInformation creator = llc.getUserInformation(objInfo.getCreatorId().intValue());
+ UserInformation modifier = llc.getUserInformation(versInfo.getOwnerId().intValue());
+ if (owner != null)
+ rd.addField(GENERAL_OWNER,owner.getName());
+ if (creator != null)
+ rd.addField(GENERAL_CREATOR,creator.getName());
+ if (modifier != null)
+ rd.addField(GENERAL_MODIFIER,modifier.getName());
+
+ // Iterate over the metadata items. These are organized by category
+ // for speed of lookup.
+
+ Iterator<MetadataItem> catIter = desc.getItems(categoryPaths);
+ while (catIter.hasNext())
+ {
+ MetadataItem item = catIter.next();
+ MetadataPathItem pathItem = item.getPathItem();
+ if (pathItem != null)
+ {
+ int catID = pathItem.getCatID();
+ // grab the associated catversion
+ LLValue catVersion = getCatVersion(objID,catID);
+ if (catVersion != null)
+ {
+ // Go through attributes now
+ Iterator<String> attrIter = item.getAttributeNames();
+ while (attrIter.hasNext())
{
- MetadataItem item = catIter.next();
- MetadataPathItem pathItem = item.getPathItem();
- if (pathItem != null)
- {
- int catID = pathItem.getCatID();
- // grab the associated catversion
- LLValue catVersion = getCatVersion(objID,catID);
- if (catVersion != null)
- {
- // Go through attributes now
- Iterator<String> attrIter = item.getAttributeNames();
- while (attrIter.hasNext())
- {
- String attrName = attrIter.next();
- // Create a unique metadata name
- String metadataName = pathItem.getCatName()+":"+attrName;
- // Fetch the metadata and stuff it into the RepositoryData structure
- String[] metadataValue = getAttributeValue(catVersion,attrName);
- if (metadataValue != null)
- rd.addField(metadataName,metadataValue);
- else
- Logging.connectors.warn("Livelink: Metadata attribute '"+metadataName+"' does not seem to exist; please correct the job");
- }
- }
-
- }
+ String attrName = attrIter.next();
+ // Create a unique metadata name
+ String metadataName = pathItem.getCatName()+":"+attrName;
+ // Fetch the metadata and stuff it into the RepositoryData structure
+ String[] metadataValue = getAttributeValue(catVersion,attrName);
+ if (metadataValue != null)
+ rd.addField(metadataName,metadataValue);
+ else
+ Logging.connectors.warn("Livelink: Metadata attribute '"+metadataName+"' does not seem to exist; please correct the job");
}
+ }
+
+ }
+ }
- if (actualAcls != null && denyAcls != null)
- rd.setSecurity(RepositoryDocument.SECURITY_TYPE_DOCUMENT,actualAcls,denyAcls);
+ if (actualAcls != null && denyAcls != null)
+ rd.setSecurity(RepositoryDocument.SECURITY_TYPE_DOCUMENT,actualAcls,denyAcls);
- // Add the path metadata item into the mix, if enabled
- String pathAttributeName = sDesc.getPathAttributeName();
- if (pathAttributeName != null && pathAttributeName.length() > 0)
- {
- String pathString = sDesc.getPathAttributeValue(documentIdentifier);
- if (pathString != null)
- {
- if (Logging.connectors.isDebugEnabled())
- Logging.connectors.debug("Livelink: Path attribute name is '"+pathAttributeName+"'"+contextMsg+", value is '"+pathString+"'");
- rd.addField(pathAttributeName,pathString);
- }
- }
+ // Add the path metadata item into the mix, if enabled
+ String pathAttributeName = sDesc.getPathAttributeName();
+ if (pathAttributeName != null && pathAttributeName.length() > 0)
+ {
+ String pathString = sDesc.getPathAttributeValue(documentIdentifier);
+ if (pathString != null)
+ {
+ if (Logging.connectors.isDebugEnabled())
+ Logging.connectors.debug("Livelink: Path attribute name is '"+pathAttributeName+"'"+contextMsg+", value is '"+pathString+"'");
+ rd.addField(pathAttributeName,pathString);
+ }
+ }
- if (ingestProtocol != null)
- {
- // Use HTTP to fetch document!
- String ingestHttpAddress = convertToIngestURI(documentIdentifier);
- if (ingestHttpAddress != null)
- {
+ if (ingestProtocol != null)
+ {
+ // Use HTTP to fetch document!
+ String ingestHttpAddress = convertToIngestURI(documentIdentifier);
+ if (ingestHttpAddress != null)
+ {
- // Set up connection
- HttpClient client = getInitializedClient(contextMsg);
+ // Set up connection
+ HttpClient client = getInitializedClient(contextMsg);
- long currentTime;
+ long currentTime;
- if (Logging.connectors.isInfoEnabled())
- Logging.connectors.info("Livelink: " + ingestHttpAddress);
+ if (Logging.connectors.isInfoEnabled())
+ Logging.connectors.info("Livelink: " + ingestHttpAddress);
- HttpGet method = new HttpGet(getHost().toURI() + ingestHttpAddress);
- method.setHeader(new BasicHeader("Accept","*/*"));
+ HttpGet method = new HttpGet(getHost().toURI() + ingestHttpAddress);
+ method.setHeader(new BasicHeader("Accept","*/*"));
- ExecuteMethodThread methodThread = new ExecuteMethodThread(client,method);
- methodThread.start();
+ ExecuteMethodThread methodThread = new ExecuteMethodThread(client,method);
+ methodThread.start();
+ try
+ {
+
+ int statusCode = methodThread.getResponseCode();
+ switch (statusCode)
+ {
+ case 500:
+ case 502:
+ Logging.connectors.warn("Livelink: Service interruption during fetch "+contextMsg+" with Livelink HTTP Server, retrying...");
+ throw new ServiceInterruption("Service interruption during fetch",new ManifoldCFException(Integer.toString(statusCode)+" error while fetching"),System.currentTimeMillis()+60000L,
+ System.currentTimeMillis()+600000L,-1,true);
+
+ case HttpStatus.SC_UNAUTHORIZED:
+ Logging.connectors.warn("Livelink: Document fetch unauthorized for "+ingestHttpAddress+" ("+contextMsg+")");
+ // Since we logged in, we should fail here if the ingestion user doesn't have access to the
+ // the document, but if we do, don't fail hard.
+ resultCode = "UNAUTHORIZED";
+ activities.noDocument(documentIdentifier,version);
+ return;
+
+ case HttpStatus.SC_OK:
+ if (Logging.connectors.isDebugEnabled())
+ Logging.connectors.debug("Livelink: Created http document connection to Livelink "+contextMsg);
+ // A non-existent content length will cause a value of -1 to be returned. This seems to indicate that the session login did not work right.
+ if (methodThread.getResponseContentLength() >= 0)
+ {
try
{
-
- int statusCode = methodThread.getResponseCode();
- switch (statusCode)
+ InputStream is = methodThread.getSafeInputStream();
+ try
{
- case 500:
- case 502:
- Logging.connectors.warn("Livelink: Service interruption during fetch "+contextMsg+" with Livelink HTTP Server, retrying...");
- throw new ServiceInterruption("Service interruption during fetch",new ManifoldCFException(Integer.toString(statusCode)+" error while fetching"),System.currentTimeMillis()+60000L,
- System.currentTimeMillis()+600000L,-1,true);
-
- case HttpStatus.SC_UNAUTHORIZED:
- Logging.connectors.warn("Livelink: Document fetch unauthorized for "+ingestHttpAddress+" ("+contextMsg+")");
- // Since we logged in, we should fail here if the ingestion user doesn't have access to the
- // the document, but if we do, don't fail hard.
- resultCode = "UNAUTHORIZED";
- activities.noDocument(documentIdentifier,version);
- return;
-
- case HttpStatus.SC_OK:
- if (Logging.connectors.isDebugEnabled())
- Logging.connectors.debug("Livelink: Created http document connection to Livelink "+contextMsg);
- // A non-existent content length will cause a value of -1 to be returned. This seems to indicate that the session login did not work right.
- if (methodThread.getResponseContentLength() >= 0)
- {
- try
- {
- InputStream is = methodThread.getSafeInputStream();
- try
- {
- rd.setBinary(is,dataSize);
+ rd.setBinary(is,dataSize);
- activities.ingestDocumentWithException(documentIdentifier,version,viewHttpAddress,rd);
-
- if (Logging.connectors.isDebugEnabled())
- Logging.connectors.debug("Livelink: Ingesting done "+contextMsg);
+ activities.ingestDocumentWithException(documentIdentifier,version,viewHttpAddress,rd);
+
+ if (Logging.connectors.isDebugEnabled())
+ Logging.connectors.debug("Livelink: Ingesting done "+contextMsg);
- }
- finally
- {
- // Close stream via thread, since otherwise this can hang
- is.close();
- }
- }
- catch (java.net.SocketTimeoutException e)
- {
- resultCode = "DATATIMEOUT";
- resultDescription = e.getMessage();
- currentTime = System.currentTimeMillis();
- Logging.connectors.warn("Livelink: Livelink socket timed out ingesting from the Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
- throw new ServiceInterruption("Socket timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
- }
- catch (java.net.SocketException e)
- {
- resultCode = "DATASOCKETERROR";
- resultDescription = e.getMessage();
- currentTime = System.currentTimeMillis();
- Logging.connectors.warn("Livelink: Livelink socket error ingesting from the Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
- throw new ServiceInterruption("Socket error: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
- }
- catch (javax.net.ssl.SSLHandshakeException e)
- {
- resultCode = "DATASSLHANDSHAKEERROR";
- resultDescription = e.getMessage();
- currentTime = System.currentTimeMillis();
- Logging.connectors.warn("Livelink: SSL handshake failed authenticating "+contextMsg+": "+e.getMessage(),e);
- throw new ServiceInterruption("SSL handshake error: "+e.getMessage(),e,currentTime+60000L,currentTime+300000L,-1,true);
- }
- catch (ConnectTimeoutException e)
- {
- resultCode = "CONNECTTIMEOUT";
- resultDescription = e.getMessage();
- currentTime = System.currentTimeMillis();
- Logging.connectors.warn("Livelink: Livelink socket timed out connecting to the Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
- throw new ServiceInterruption("Connect timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
- }
- catch (InterruptedException e)
- {
- wasInterrupted = true;
- throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
- }
- catch (InterruptedIOException e)
- {
- wasInterrupted = true;
- throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
- }
- catch (HttpException e)
- {
- resultCode = "HTTPEXCEPTION";
- resultDescription = e.getMessage();
- // Treat unknown error ingesting data as a transient condition
- currentTime = System.currentTimeMillis();
- Logging.connectors.warn("Livelink: HTTP exception ingesting "+contextMsg+": "+e.getMessage(),e);
- throw new ServiceInterruption("HTTP exception ingesting "+contextMsg+": "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
- }
- catch (IOException e)
- {
- resultCode = "DATAEXCEPTION";
- resultDescription = e.getMessage();
- // Treat unknown error ingesting data as a transient condition
- currentTime = System.currentTimeMillis();
- Logging.connectors.warn("Livelink: IO exception ingesting "+contextMsg+": "+e.getMessage(),e);
- throw new ServiceInterruption("IO exception ingesting "+contextMsg+": "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
- }
- readSize = dataSize;
- }
- else
- {
- resultCode = "SESSIONLOGINFAILED";
- activities.noDocument(documentIdentifier,version);
- }
- break;
- case HttpStatus.SC_BAD_REQUEST:
- case HttpStatus.SC_USE_PROXY:
- case HttpStatus.SC_GONE:
- resultCode = "ERROR "+Integer.toString(statusCode);
- throw new ManifoldCFException("Unrecoverable request failure; error = "+Integer.toString(statusCode));
- default:
- resultCode = "UNKNOWN";
- Logging.connectors.warn("Livelink: Attempt to retrieve document from '"+ingestHttpAddress+"' received a response of "+Integer.toString(statusCode)+"; retrying in one minute");
- currentTime = System.currentTimeMillis();
- throw new ServiceInterruption("Fetch failed; retrying in 1 minute",new ManifoldCFException("Fetch failed with unknown code "+Integer.toString(statusCode)),
- currentTime+60000L,currentTime+600000L,-1,true);
}
- }
- catch (InterruptedException e)
- {
- // Drop the connection on the floor
- methodThread.interrupt();
- methodThread = null;
- throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+ finally
+ {
+ // Close stream via thread, since otherwise this can hang
+ is.close();
+ }
}
catch (java.net.SocketTimeoutException e)
{
- Logging.connectors.warn("Livelink: Socket timed out reading from the Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
- resultCode = "TIMEOUT";
+ resultCode = "DATATIMEOUT";
resultDescription = e.getMessage();
currentTime = System.currentTimeMillis();
- throw new ServiceInterruption("Socket timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,true);
+ Logging.connectors.warn("Livelink: Livelink socket timed out ingesting from the Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
+ throw new ServiceInterruption("Socket timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
}
catch (java.net.SocketException e)
{
- Logging.connectors.warn("Livelink: Socket error reading from Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
- resultCode = "SOCKETERROR";
+ resultCode = "DATASOCKETERROR";
resultDescription = e.getMessage();
currentTime = System.currentTimeMillis();
- throw new ServiceInterruption("Socket error: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,true);
+ Logging.connectors.warn("Livelink: Livelink socket error ingesting from the Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
+ throw new ServiceInterruption("Socket error: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
}
catch (javax.net.ssl.SSLHandshakeException e)
{
- currentTime = System.currentTimeMillis();
- Logging.connectors.warn("Livelink: SSL handshake failed "+contextMsg+": "+e.getMessage(),e);
- resultCode = "SSLHANDSHAKEERROR";
+ resultCode = "DATASSLHANDSHAKEERROR";
resultDescription = e.getMessage();
+ currentTime = System.currentTimeMillis();
+ Logging.connectors.warn("Livelink: SSL handshake failed authenticating "+contextMsg+": "+e.getMessage(),e);
throw new ServiceInterruption("SSL handshake error: "+e.getMessage(),e,currentTime+60000L,currentTime+300000L,-1,true);
}
catch (ConnectTimeoutException e)
{
- Logging.connectors.warn("Livelink: Connect timed out reading from the Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
resultCode = "CONNECTTIMEOUT";
resultDescription = e.getMessage();
currentTime = System.currentTimeMillis();
- throw new ServiceInterruption("Connect timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,true);
+ Logging.connectors.warn("Livelink: Livelink socket timed out connecting to the Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
+ throw new ServiceInterruption("Connect timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
+ }
+ catch (InterruptedException e)
+ {
+ wasInterrupted = true;
+ throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
}
catch (InterruptedIOException e)
{
- methodThread.interrupt();
+ wasInterrupted = true;
throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
}
catch (HttpException e)
{
- resultCode = "EXCEPTION";
+ resultCode = "HTTPEXCEPTION";
resultDescription = e.getMessage();
- throw new ManifoldCFException("Exception getting response "+contextMsg+": "+e.getMessage(), e);
+ // Treat unknown error ingesting data as a transient condition
+ currentTime = System.currentTimeMillis();
+ Logging.connectors.warn("Livelink: HTTP exception ingesting "+contextMsg+": "+e.getMessage(),e);
+ throw new ServiceInterruption("HTTP exception ingesting "+contextMsg+": "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
}
catch (IOException e)
{
- resultCode = "EXCEPTION";
+ resultCode = "DATAEXCEPTION";
resultDescription = e.getMessage();
- throw new ManifoldCFException("Exception getting response "+contextMsg+": "+e.getMessage(), e);
- }
- finally
- {
- if (methodThread != null)
- {
- methodThread.abort();
- if (!wasInterrupted)
- {
- try
- {
- methodThread.finishUp();
- }
- catch (InterruptedException e)
- {
- wasInterrupted = true;
- throw new ManifoldCFException(e.getMessage(),e,ManifoldCFException.INTERRUPTED);
- }
- }
- }
+ // Treat unknown error ingesting data as a transient condition
+ currentTime = System.currentTimeMillis();
+ Logging.connectors.warn("Livelink: IO exception ingesting "+contextMsg+": "+e.getMessage(),e);
+ throw new ServiceInterruption("IO exception ingesting "+contextMsg+": "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
}
+ readSize = dataSize;
}
else
{
- if (Logging.connectors.isDebugEnabled())
- Logging.connectors.debug("Livelink: No fetch URI "+contextMsg+" - not ingesting");
- resultCode = "NOURI";
- return;
+ resultCode = "SESSIONLOGINFAILED";
+ activities.noDocument(documentIdentifier,version);
}
+ break;
+ case HttpStatus.SC_BAD_REQUEST:
+ case HttpStatus.SC_USE_PROXY:
+ case HttpStatus.SC_GONE:
+ resultCode = "ERROR "+Integer.toString(statusCode);
+ throw new ManifoldCFException("Unrecoverable request failure; error = "+Integer.toString(statusCode));
+ default:
+ resultCode = "UNKNOWN";
+ Logging.connectors.warn("Livelink: Attempt to retrieve document from '"+ingestHttpAddress+"' received a response of "+Integer.toString(statusCode)+"; retrying in one minute");
+ currentTime = System.currentTimeMillis();
+ throw new ServiceInterruption("Fetch failed; retrying in 1 minute",new ManifoldCFException("Fetch failed with unknown code "+Integer.toString(statusCode)),
+ currentTime+60000L,currentTime+600000L,-1,true);
}
- else
+ }
+ catch (InterruptedException e)
+ {
+ // Drop the connection on the floor
+ methodThread.interrupt();
+ methodThread = null;
+ throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+ }
+ catch (java.net.SocketTimeoutException e)
+ {
+ Logging.connectors.warn("Livelink: Socket timed out reading from the Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
+ resultCode = "TIMEOUT";
+ resultDescription = e.getMessage();
+ currentTime = System.currentTimeMillis();
+ throw new ServiceInterruption("Socket timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,true);
+ }
+ catch (java.net.SocketException e)
+ {
+ Logging.connectors.warn("Livelink: Socket error reading from Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
+ resultCode = "SOCKETERROR";
+ resultDescription = e.getMessage();
+ currentTime = System.currentTimeMillis();
+ throw new ServiceInterruption("Socket error: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,true);
+ }
+ catch (javax.net.ssl.SSLHandshakeException e)
+ {
+ currentTime = System.currentTimeMillis();
+ Logging.connectors.warn("Livelink: SSL handshake failed "+contextMsg+": "+e.getMessage(),e);
+ resultCode = "SSLHANDSHAKEERROR";
+ resultDescription = e.getMessage();
+ throw new ServiceInterruption("SSL handshake error: "+e.getMessage(),e,currentTime+60000L,currentTime+300000L,-1,true);
+ }
+ catch (ConnectTimeoutException e)
+ {
+ Logging.connectors.warn("Livelink: Connect timed out reading from the Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
+ resultCode = "CONNECTTIMEOUT";
+ resultDescription = e.getMessage();
+ currentTime = System.currentTimeMillis();
+ throw new ServiceInterruption("Connect timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,true);
+ }
+ catch (InterruptedIOException e)
+ {
+ methodThread.interrupt();
+ throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+ }
+ catch (HttpException e)
+ {
+ resultCode = "EXCEPTION";
+ resultDescription = e.getMessage();
+ throw new ManifoldCFException("Exception getting response "+contextMsg+": "+e.getMessage(), e);
+ }
+ catch (IOException e)
+ {
+ resultCode = "EXCEPTION";
+ resultDescription = e.getMessage();
+ throw new ManifoldCFException("Exception getting response "+contextMsg+": "+e.getMessage(), e);
+ }
+ finally
+ {
+ if (methodThread != null)
{
- // Use FetchVersion instead
- long currentTime;
-
- // Fire up the document reading thread
- DocumentReadingThread t = new DocumentReadingThread(vol,objID,0);
- try
+ methodThread.abort();
+ if (!wasInterrupted)
{
- t.start();
try
{
- InputStream is = t.getSafeInputStream();
- try
- {
- // Can only index while background thread is running!
- rd.setBinary(is, dataSize);
- activities.ingestDocumentWithException(documentIdentifier, version, viewHttpAddress, rd);
- }
- finally
- {
- is.close();
- }
- }
- catch (ManifoldCFException e)
- {
- if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
- wasInterrupted = true;
- throw e;
+ methodThread.finishUp();
}
- catch (java.net.SocketTimeoutException e)
- {
- throw e;
- }
- catch (InterruptedIOException e)
+ catch (InterruptedException e)
{
wasInterrupted = true;
- throw e;
- }
- finally
- {
- if (!wasInterrupted)
- t.finishUp();
- }
-
- // No errors. Record the fact that we made it.
- resultCode = "OK";
- readSize = dataSize;
- }
- catch (InterruptedException e)
- {
- t.interrupt();
- throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
- ManifoldCFException.INTERRUPTED);
- }
- catch (ConnectTimeoutException e)
- {
- Logging.connectors.warn("Livelink: Connect timed out "+contextMsg+": "+e.getMessage(), e);
- resultCode = "CONNECTTIMEOUT";
- resultDescription = e.getMessage();
- currentTime = System.currentTimeMillis();
- throw new ServiceInterruption("Connect timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,true);
- }
- catch (InterruptedIOException e)
- {
- t.interrupt();
- throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
- }
- catch (IOException e)
- {
- resultCode = "EXCEPTION";
- resultDescription = e.getMessage();
- throw new ManifoldCFException("Exception getting response "+contextMsg+": "+e.getMessage(), e);
- }
- catch (ManifoldCFException e)
- {
- if (e.getErrorCode() != ManifoldCFException.INTERRUPTED)
- {
- resultCode = "EXCEPTION";
- resultDescription = e.getMessage();
+ throw new ManifoldCFException(e.getMessage(),e,ManifoldCFException.INTERRUPTED);
}
- throw e;
- }
- catch (RuntimeException e)
- {
- resultCode = "EXCEPTION";
- resultDescription = e.getMessage();
- handleLivelinkRuntimeException(e,0,true);
}
}
}
- else
- {
- // Document not indexable because of its length
- resultDescription = "Document length ("+dataSize+") was rejected by output connector";
- if (Logging.connectors.isDebugEnabled())
- Logging.connectors.debug("Livelink: Excluding document "+documentIdentifier+" because its length ("+dataSize+") was rejected by output connector");
- resultCode = "DOCUMENTTOOLONG";
- activities.noDocument(documentIdentifier,version);
- }
}
else
{
- // Document not indexable because of its mime type
- resultDescription = "Mime type ("+mimeType+") was rejected by output connector";
if (Logging.connectors.isDebugEnabled())
- Logging.connectors.debug("Livelink: Excluding document "+documentIdentifier+" because its mime type ("+mimeType+") was rejected by output connector");
- resultCode = "MIMETYPEEXCLUSION";
- activities.noDocument(documentIdentifier,version);
+ Logging.connectors.debug("Livelink: No fetch URI "+contextMsg+" - not ingesting");
+ resultCode = "NOURI";
+ return;
}
}
else
{
- // Document not ingestable due to URL
- resultDescription = "URL ("+viewHttpAddress+") was rejected by output connector";
- if (Logging.connectors.isDebugEnabled())
- Logging.connectors.debug("Livelink: Excluding document "+documentIdentifier+" because its URL ("+viewHttpAddress+") was rejected by output connector");
- resultCode = "URLEXCLUSION";
- activities.noDocument(documentIdentifier,version);
+ // Use FetchVersion instead
+ long currentTime;
+
+ // Fire up the document reading thread
+ DocumentReadingThread t = new DocumentReadingThread(vol,objID,0);
+ try
+ {
+ t.start();
+ try
+ {
+ InputStream is = t.getSafeInputStream();
+ try
+ {
+ // Can only index while background thread is running!
+ rd.setBinary(is, dataSize);
+ activities.ingestDocumentWithException(documentIdentifier, version, viewHttpAddress, rd);
+ }
+ finally
+ {
+ is.close();
+ }
+ }
+ catch (ManifoldCFException e)
+ {
+ if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
+ wasInterrupted = true;
+ throw e;
+ }
+ catch (java.net.SocketTimeoutException e)
+ {
+ throw e;
+ }
+ catch (InterruptedIOException e)
+ {
+ wasInterrupted = true;
+ throw e;
+ }
+ finally
+ {
+ if (!wasInterrupted)
+ t.finishUp();
+ }
+
+ // No errors. Record the fact that we made it.
+ resultCode = "OK";
+ readSize = dataSize;
+ }
+ catch (InterruptedException e)
+ {
+ t.interrupt();
+ throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
+ ManifoldCFException.INTERRUPTED);
+ }
+ catch (ConnectTimeoutException e)
+ {
+ Logging.connectors.warn("Livelink: Connect timed out "+contextMsg+": "+e.getMessage(), e);
+ resultCode = "CONNECTTIMEOUT";
+ resultDescription = e.getMessage();
+ currentTime = System.currentTimeMillis();
+ throw new ServiceInterruption("Connect timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,true);
+ }
+ catch (InterruptedIOException e)
+ {
+ t.interrupt();
+ throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+ }
+ catch (IOException e)
+ {
+ resultCode = "EXCEPTION";
+ resultDescription = e.getMessage();
+ throw new ManifoldCFException("Exception getting response "+contextMsg+": "+e.getMessage(), e);
+ }
+ catch (ManifoldCFException e)
+ {
+ if (e.getErrorCode() != ManifoldCFException.INTERRUPTED)
+ {
+ resultCode = "EXCEPTION";
+ resultDescription = e.getMessage();
+ }
+ throw e;
+ }
+ catch (RuntimeException e)
+ {
+ resultCode = "EXCEPTION";
+ resultDescription = e.getMessage();
+ handleLivelinkRuntimeException(e,0,true);
+ }
}
}
finally
Propchange: manifoldcf/trunk/connectors/sharepoint/
------------------------------------------------------------------------------
Merged /manifoldcf/branches/CONNECTORS-1067/connectors/sharepoint:r1630049-1630186
Modified: manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java?rev=1630188&r1=1630187&r2=1630188&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java (original)
+++ manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java Wed Oct 8 17:54:47 2014
@@ -1634,226 +1634,229 @@ public class SharePointRepository extend
throws ManifoldCFException, ServiceInterruption
{
// Before we fetch, confirm that the output connector will accept the document
- if (activities.checkURLIndexable(fileUrl))
+ if (!activities.checkURLIndexable(fileUrl))
{
- // Also check mime type
- String contentType = mapExtensionToMimeType(documentIdentifier);
- if (activities.checkMimeTypeIndexable(contentType))
+ // URL failed
+ if (Logging.connectors.isDebugEnabled())
+ Logging.connectors.debug("SharePoint: Skipping document '"+documentIdentifier+"' because output connector says URL '"+fileUrl+"' is not indexable");
+ return false;
+ }
+
+ // Also check mime type
+ String contentType = mapExtensionToMimeType(documentIdentifier);
+ if (!activities.checkMimeTypeIndexable(contentType))
+ {
+ // Mime type failed
+ if (Logging.connectors.isDebugEnabled())
+ Logging.connectors.debug("SharePoint: Skipping document '"+documentIdentifier+"' because output connector says mime type '"+((contentType==null)?"null":contentType)+"' is not indexable");
+ return false;
+ }
+
+ // Now check date stamp
+ if (!activities.checkDateIndexable(modifiedDate))
+ {
+ // Date failed
+ if (Logging.connectors.isDebugEnabled())
+ Logging.connectors.debug("SharePoint: Skipping document '"+documentIdentifier+"' because output connector says date '"+((modifiedDate==null)?"null":modifiedDate)+"' is not indexable");
+ return false;
+ }
+
+ // Set stuff up for fetch activity logging
+ long startFetchTime = System.currentTimeMillis();
+ try
+ {
+ // Read the document into a local temporary file, so I get a reliable length.
+ File tempFile = File.createTempFile("__shp__",".tmp");
+ try
{
- // Set stuff up for fetch activity logging
- long startFetchTime = System.currentTimeMillis();
+ // Open the output stream
+ OutputStream os = new FileOutputStream(tempFile);
try
{
- // Read the document into a local temporary file, so I get a reliable length.
- File tempFile = File.createTempFile("__shp__",".tmp");
+ // Catch all exceptions having to do with reading the document
try
{
- // Open the output stream
- OutputStream os = new FileOutputStream(tempFile);
- try
- {
- // Catch all exceptions having to do with reading the document
- try
- {
- ExecuteMethodThread emt = new ExecuteMethodThread(httpClient, fetchUrl, os);
- emt.start();
- int returnCode = emt.finishUp();
+ ExecuteMethodThread emt = new ExecuteMethodThread(httpClient, fetchUrl, os);
+ emt.start();
+ int returnCode = emt.finishUp();
- if (returnCode == 404 || returnCode == 401 || returnCode == 400 || returnCode == 415)
- {
- // Well, sharepoint thought the document was there, but it really isn't, so delete it.
- if (Logging.connectors.isDebugEnabled())
- Logging.connectors.debug("SharePoint: Document at '"+fileUrl+"' failed to fetch with code "+Integer.toString(returnCode)+", deleting");
- activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
- null,documentIdentifier,"Not found",Integer.toString(returnCode),null);
- return false;
- }
- else if (returnCode != 200)
- {
- activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
- null,documentIdentifier,"Error","Http status "+Integer.toString(returnCode),null);
- throw new ManifoldCFException("Error fetching document '"+fileUrl+"': "+Integer.toString(returnCode));
- }
-
- // Log the normal fetch activity
- activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
- new Long(tempFile.length()),documentIdentifier,"Success",null,null);
-
- }
- catch (InterruptedException e)
- {
- throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
- }
- catch (java.net.SocketTimeoutException e)
- {
- activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
- new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
- Logging.connectors.warn("SharePoint: SocketTimeoutException thrown: "+e.getMessage(),e);
- long currentTime = System.currentTimeMillis();
- throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"', retrying: "+e.getMessage(),e,currentTime + 300000L,
- currentTime + 12 * 60 * 60000L,-1,true);
- }
- catch (org.apache.http.conn.ConnectTimeoutException e)
- {
- activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
- new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
- Logging.connectors.warn("SharePoint: ConnectTimeoutException thrown: "+e.getMessage(),e);
- long currentTime = System.currentTimeMillis();
- throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"', retrying: "+e.getMessage(),e,currentTime + 300000L,
- currentTime + 12 * 60 * 60000L,-1,true);
- }
- catch (InterruptedIOException e)
- {
- throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
- }
- catch (IllegalArgumentException e)
- {
- Logging.connectors.error("SharePoint: Illegal argument", e);
- activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
- new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
- throw new ManifoldCFException("SharePoint: Illegal argument: "+e.getMessage(),e);
- }
- catch (org.apache.http.HttpException e)
- {
- Logging.connectors.warn("SharePoint: HttpException thrown",e);
- activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
- new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
- long currentTime = System.currentTimeMillis();
- throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"', retrying: "+e.getMessage(),e,currentTime + 300000L,
- currentTime + 12 * 60 * 60000L,-1,true);
- }
- catch (IOException e)
- {
- activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
- new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
- Logging.connectors.warn("SharePoint: IOException thrown: "+e.getMessage(),e);
- long currentTime = System.currentTimeMillis();
- throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"', retrying: "+e.getMessage(),e,currentTime + 300000L,
- currentTime + 12 * 60 * 60000L,-1,true);
- }
- }
- finally
+ if (returnCode == 404 || returnCode == 401 || returnCode == 400 || returnCode == 415)
{
- os.close();
+ // Well, sharepoint thought the document was there, but it really isn't, so delete it.
+ if (Logging.connectors.isDebugEnabled())
+ Logging.connectors.debug("SharePoint: Document at '"+fileUrl+"' failed to fetch with code "+Integer.toString(returnCode)+", deleting");
+ activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+ null,documentIdentifier,"Not found",Integer.toString(returnCode),null);
+ return false;
}
-
- // Ingest the document
- long documentLength = tempFile.length();
- if (activities.checkLengthIndexable(documentLength))
+ else if (returnCode != 200)
{
- InputStream is = new FileInputStream(tempFile);
- try
- {
- RepositoryDocument data = new RepositoryDocument();
- data.setBinary( is, documentLength );
-
- data.setFileName(mapToFileName(documentIdentifier));
-
- if (contentType != null)
- data.setMimeType(contentType);
-
- setDataACLs(data,accessTokens,denyTokens);
-
- setPathAttribute(data,sDesc,documentIdentifier);
-
- if (modifiedDate != null)
- data.setModifiedDate(modifiedDate);
- if (createdDate != null)
- data.setCreatedDate(createdDate);
+ activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+ null,documentIdentifier,"Error","Http status "+Integer.toString(returnCode),null);
+ throw new ManifoldCFException("Error fetching document '"+fileUrl+"': "+Integer.toString(returnCode));
+ }
- if (metadataValues != null)
- {
- Iterator<String> iter = metadataValues.keySet().iterator();
- while (iter.hasNext())
- {
- String fieldName = iter.next();
- String fieldData = metadataValues.get(fieldName);
- data.addField(fieldName,fieldData);
- }
- }
- data.addField("GUID",guid);
+ // Log the normal fetch activity
+ activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+ new Long(tempFile.length()),documentIdentifier,"Success",null,null);
- try
- {
- activities.ingestDocumentWithException( documentIdentifier, version, fileUrl , data );
- }
- catch (IOException e)
- {
- handleIOException(e,"reading document");
- }
- return true;
- }
- finally
- {
- try
- {
- is.close();
- }
- catch (java.net.SocketTimeoutException e)
- {
- // This is not fatal
- Logging.connectors.debug("SharePoint: Timeout before read could finish for '"+fileUrl+"': "+e.getMessage(),e);
- }
- catch (org.apache.http.conn.ConnectTimeoutException e)
- {
- // This is not fatal
- Logging.connectors.debug("SharePoint: Connect timeout before read could finish for '"+fileUrl+"': "+e.getMessage(),e);
- }
- catch (InterruptedIOException e)
- {
- throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
- }
- catch (IOException e)
- {
- // This is not fatal
- Logging.connectors.debug("SharePoint: Server closed connection before read could finish for '"+fileUrl+"': "+e.getMessage(),e);
- }
- }
- }
- else
- {
- // Document too long
- if (Logging.connectors.isDebugEnabled())
- Logging.connectors.debug("SharePoint: Document '"+documentIdentifier+"' was too long, according to output connector");
- return false;
- }
}
- finally
+ catch (InterruptedException e)
+ {
+ throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+ }
+ catch (java.net.SocketTimeoutException e)
+ {
+ activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+ new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
+ Logging.connectors.warn("SharePoint: SocketTimeoutException thrown: "+e.getMessage(),e);
+ long currentTime = System.currentTimeMillis();
+ throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"', retrying: "+e.getMessage(),e,currentTime + 300000L,
+ currentTime + 12 * 60 * 60000L,-1,true);
+ }
+ catch (org.apache.http.conn.ConnectTimeoutException e)
+ {
+ activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+ new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
+ Logging.connectors.warn("SharePoint: ConnectTimeoutException thrown: "+e.getMessage(),e);
+ long currentTime = System.currentTimeMillis();
+ throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"', retrying: "+e.getMessage(),e,currentTime + 300000L,
+ currentTime + 12 * 60 * 60000L,-1,true);
+ }
+ catch (InterruptedIOException e)
+ {
+ throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+ }
+ catch (IllegalArgumentException e)
+ {
+ Logging.connectors.error("SharePoint: Illegal argument", e);
+ activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+ new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
+ throw new ManifoldCFException("SharePoint: Illegal argument: "+e.getMessage(),e);
+ }
+ catch (org.apache.http.HttpException e)
+ {
+ Logging.connectors.warn("SharePoint: HttpException thrown",e);
+ activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+ new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
+ long currentTime = System.currentTimeMillis();
+ throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"', retrying: "+e.getMessage(),e,currentTime + 300000L,
+ currentTime + 12 * 60 * 60000L,-1,true);
+ }
+ catch (IOException e)
{
- tempFile.delete();
+ activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+ new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
+ Logging.connectors.warn("SharePoint: IOException thrown: "+e.getMessage(),e);
+ long currentTime = System.currentTimeMillis();
+ throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"', retrying: "+e.getMessage(),e,currentTime + 300000L,
+ currentTime + 12 * 60 * 60000L,-1,true);
}
}
- catch (java.net.SocketTimeoutException e)
+ finally
{
- throw new ManifoldCFException("Socket timeout error writing '"+fileUrl+"' to temporary file: "+e.getMessage(),e);
+ os.close();
}
- catch (org.apache.http.conn.ConnectTimeoutException e)
+
+ // Ingest the document
+ long documentLength = tempFile.length();
+ if (!activities.checkLengthIndexable(documentLength))
{
- throw new ManifoldCFException("Connect timeout error writing '"+fileUrl+"' to temporary file: "+e.getMessage(),e);
+ // Document too long
+ if (Logging.connectors.isDebugEnabled())
+ Logging.connectors.debug("SharePoint: Document '"+documentIdentifier+"' was too long, according to output connector");
+ return false;
}
- catch (InterruptedIOException e)
+
+ InputStream is = new FileInputStream(tempFile);
+ try
{
- throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+ RepositoryDocument data = new RepositoryDocument();
+ data.setBinary( is, documentLength );
+
+ data.setFileName(mapToFileName(documentIdentifier));
+
+ if (contentType != null)
+ data.setMimeType(contentType);
+
+ setDataACLs(data,accessTokens,denyTokens);
+
+ setPathAttribute(data,sDesc,documentIdentifier);
+
+ if (modifiedDate != null)
+ data.setModifiedDate(modifiedDate);
+ if (createdDate != null)
+ data.setCreatedDate(createdDate);
+
+ if (metadataValues != null)
+ {
+ Iterator<String> iter = metadataValues.keySet().iterator();
+ while (iter.hasNext())
+ {
+ String fieldName = iter.next();
+ String fieldData = metadataValues.get(fieldName);
+ data.addField(fieldName,fieldData);
+ }
+ }
+ data.addField("GUID",guid);
+
+ try
+ {
+ activities.ingestDocumentWithException( documentIdentifier, version, fileUrl , data );
+ }
+ catch (IOException e)
+ {
+ handleIOException(e,"reading document");
+ }
+ return true;
}
- catch (IOException e)
+ finally
{
- throw new ManifoldCFException("IO error writing '"+fileUrl+"' to temporary file: "+e.getMessage(),e);
+ try
+ {
+ is.close();
+ }
+ catch (java.net.SocketTimeoutException e)
+ {
+ // This is not fatal
+ Logging.connectors.debug("SharePoint: Timeout before read could finish for '"+fileUrl+"': "+e.getMessage(),e);
+ }
+ catch (org.apache.http.conn.ConnectTimeoutException e)
+ {
+ // This is not fatal
+ Logging.connectors.debug("SharePoint: Connect timeout before read could finish for '"+fileUrl+"': "+e.getMessage(),e);
+ }
+ catch (InterruptedIOException e)
+ {
+ throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+ }
+ catch (IOException e)
+ {
+ // This is not fatal
+ Logging.connectors.debug("SharePoint: Server closed connection before read could finish for '"+fileUrl+"': "+e.getMessage(),e);
+ }
}
}
- else
+ finally
{
- // Mime type failed
- if (Logging.connectors.isDebugEnabled())
- Logging.connectors.debug("SharePoint: Skipping document '"+documentIdentifier+"' because output connector says mime type '"+((contentType==null)?"null":contentType)+"' is not indexable");
- return false;
+ tempFile.delete();
}
}
- else
+ catch (java.net.SocketTimeoutException e)
{
- // URL failed
- if (Logging.connectors.isDebugEnabled())
- Logging.connectors.debug("SharePoint: Skipping document '"+documentIdentifier+"' because output connector says URL '"+fileUrl+"' is not indexable");
- return false;
+ throw new ManifoldCFException("Socket timeout error writing '"+fileUrl+"' to temporary file: "+e.getMessage(),e);
+ }
+ catch (org.apache.http.conn.ConnectTimeoutException e)
+ {
+ throw new ManifoldCFException("Connect timeout error writing '"+fileUrl+"' to temporary file: "+e.getMessage(),e);
+ }
+ catch (InterruptedIOException e)
+ {
+ throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+ }
+ catch (IOException e)
+ {
+ throw new ManifoldCFException("IO error writing '"+fileUrl+"' to temporary file: "+e.getMessage(),e);
}
}
Propchange: manifoldcf/trunk/connectors/wiki/
------------------------------------------------------------------------------
Merged /manifoldcf/branches/CONNECTORS-1067/connectors/wiki:r1630049-1630186
Modified: manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java?rev=1630188&r1=1630187&r2=1630188&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java (original)
+++ manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java Wed Oct 8 17:54:47 2014
@@ -3866,13 +3866,39 @@ public class WikiConnector extends org.a
String comment = t.getComment();
String title = t.getTitle();
String lastModified = t.getLastModified();
+ Date modifiedDate = (lastModified==null)?null:DateParser.parseISO8601Date(lastModified);
+ String contentType = "text/plain";
+ dataSize = contentFile.length();
+
+ if (!activities.checkURLIndexable(fullURL))
+ {
+ activities.noDocument(documentIdentifier,documentVersion);
+ return;
+ }
+
+ if (!activities.checkLengthIndexable(dataSize))
+ {
+ activities.noDocument(documentIdentifier,documentVersion);
+ return;
+ }
+
+ if (!activities.checkMimeTypeIndexable(contentType))
+ {
+ activities.noDocument(documentIdentifier,documentVersion);
+ return;
+ }
+
+ if (!activities.checkDateIndexable(modifiedDate))
+ {
+ activities.noDocument(documentIdentifier,documentVersion);
+ return;
+ }
RepositoryDocument rd = new RepositoryDocument();
// For wiki, type is always text/plain
- rd.setMimeType("text/plain");
+ rd.setMimeType(contentType);
- dataSize = contentFile.length();
InputStream is = new FileInputStream(contentFile);
try
{
@@ -3886,7 +3912,7 @@ public class WikiConnector extends org.a
if (lastModified != null)
{
rd.addField("last-modified",lastModified);
- rd.setModifiedDate(DateParser.parseISO8601Date(lastModified));
+ rd.setModifiedDate(modifiedDate);
}
if (allowACL != null && allowACL.length > 0) {
Modified: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java?rev=1630188&r1=1630187&r2=1630188&view=diff
==============================================================================
--- manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java (original)
+++ manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java Wed Oct 8 17:54:47 2014
@@ -227,6 +227,34 @@ public class IncrementalIngester extends
return pipelineSpecificationBasic.getStageConnectionName(pipelineSpecificationBasic.getOutputStage(0));
}
+ /** Check if a date is indexable.
+ *@param pipelineSpecification is the pipeline specification.
+ *@param date is the date to check.
+ *@param activity are the activities available to this method.
+ *@return true if the mimeType is indexable.
+ */
+ @Override
+ public boolean checkDateIndexable(
+ IPipelineSpecification pipelineSpecification,
+ Date date,
+ IOutputCheckActivity activity)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ PipelineObject pipeline = pipelineGrab(
+ new PipelineConnections(pipelineSpecification));
+ if (pipeline == null)
+ // A connector is not installed; treat this as a service interruption.
+ throw new ServiceInterruption("One or more connectors are not installed",0L);
+ try
+ {
+ return pipeline.checkDateIndexable(date,activity);
+ }
+ finally
+ {
+ pipeline.release();
+ }
+ }
+
/** Check if a mime type is indexable.
*@param pipelineSpecification is the pipeline specification.
*@param mimeType is the mime type to check.
@@ -2485,6 +2513,18 @@ public class IncrementalIngester extends
addActivities.noDocument();
}
+ /** Detect if a date is acceptable downstream or not. This method is used to determine whether it makes sense to fetch a document
+ * in the first place.
+ *@param date is the mime type of the document.
+ *@return true if the date can be accepted by the downstream connection.
+ */
+ @Override
+ public boolean checkDateIndexable(Date date)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ return addActivities.checkDateIndexable(date);
+ }
+
/** Detect if a mime type is acceptable downstream or not. This method is used to determine whether it makes sense to fetch a document
* in the first place.
*@param mimeType is the mime type of the document.
@@ -2562,7 +2602,14 @@ public class IncrementalIngester extends
this.transformationConnectors = transformationConnectors;
this.outputConnectors = outputConnectors;
}
-
+
+ public boolean checkDateIndexable(Date date, IOutputCheckActivity finalActivity)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ PipelineCheckFanout entryPoint = buildCheckPipeline(finalActivity);
+ return entryPoint.checkDateIndexable(date);
+ }
+
public boolean checkMimeTypeIndexable(String mimeType, IOutputCheckActivity finalActivity)
throws ManifoldCFException, ServiceInterruption
{
@@ -2824,6 +2871,19 @@ public class IncrementalIngester extends
}
@Override
+ public boolean checkDateIndexable(Date date)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ // OR all results
+ for (PipelineCheckEntryPoint p : entryPoints)
+ {
+ if (p.checkDateIndexable(date))
+ return true;
+ }
+ return false;
+ }
+
+ @Override
public boolean checkMimeTypeIndexable(String mimeType)
throws ManifoldCFException, ServiceInterruption
{
@@ -2894,6 +2954,12 @@ public class IncrementalIngester extends
this.checkActivity = checkActivity;
}
+ public boolean checkDateIndexable(Date date)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ return pipelineConnector.checkDateIndexable(pipelineDescriptionString,date,checkActivity);
+ }
+
public boolean checkMimeTypeIndexable(String mimeType)
throws ManifoldCFException, ServiceInterruption
{
@@ -2948,6 +3014,19 @@ public class IncrementalIngester extends
}
@Override
+ public boolean checkDateIndexable(Date date)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ // OR all results
+ for (PipelineAddEntryPoint p : entryPoints)
+ {
+ if (p.checkDateIndexable(date))
+ return true;
+ }
+ return false;
+ }
+
+ @Override
public boolean checkMimeTypeIndexable(String mimeType)
throws ManifoldCFException, ServiceInterruption
{
@@ -3137,6 +3216,12 @@ public class IncrementalIngester extends
{
return isActive;
}
+
+ public boolean checkDateIndexable(Date date)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ return pipelineConnector.checkDateIndexable(pipelineDescriptionString,date,addActivity);
+ }
public boolean checkMimeTypeIndexable(String mimeType)
throws ManifoldCFException, ServiceInterruption
@@ -3782,6 +3867,18 @@ public class IncrementalIngester extends
activities.recordActivity(startTime,activityType,dataSize,entityURI,resultCode,resultDescription);
}
+ /** Detect if a date is acceptable downstream or not. This method is used to determine whether it makes sense to fetch a document
+ * in the first place.
+ *@param date is the date of the document.
+ *@return true if the document described by the date can be accepted by the downstream connection.
+ */
+ @Override
+ public boolean checkDateIndexable(Date date)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ return activities.checkDateIndexable(date);
+ }
+
/** Detect if a mime type is acceptable downstream or not. This method is used to determine whether it makes sense to fetch a document
* in the first place.
*@param mimeType is the mime type of the document.
Modified: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java?rev=1630188&r1=1630187&r2=1630188&view=diff
==============================================================================
--- manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java (original)
+++ manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java Wed Oct 8 17:54:47 2014
@@ -90,6 +90,18 @@ public interface IIncrementalIngester
public VersionContext getTransformationDescription(String transformationConnectionName, Specification spec)
throws ManifoldCFException, ServiceInterruption;
+ /** Check if a document date is indexable.
+ *@param pipelineSpecification is the pipeline specification.
+ *@param date is the date to check
+ *@param activity are the activities available to this method.
+ *@return true if the document with that date is indexable.
+ */
+ public boolean checkDateIndexable(
+ IPipelineSpecification pipelineSpecification,
+ Date date,
+ IOutputCheckActivity activity)
+ throws ManifoldCFException, ServiceInterruption;
+
/** Check if a mime type is indexable.
*@param pipelineSpecification is the pipeline specification.
*@param mimeType is the mime type to check.
Modified: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputCheckActivity.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputCheckActivity.java?rev=1630188&r1=1630187&r2=1630188&view=diff
==============================================================================
--- manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputCheckActivity.java (original)
+++ manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputCheckActivity.java Wed Oct 8 17:54:47 2014
@@ -22,6 +22,7 @@ import org.apache.manifoldcf.core.interf
import org.apache.manifoldcf.agents.interfaces.*;
import java.io.*;
+import java.util.*;
/** This interface abstracts from the activities that a transformation connector can do
when checking a document.
@@ -30,6 +31,14 @@ public interface IOutputCheckActivity
{
public static final String _rcsid = "@(#)$Id$";
+ /** Detect if a document date is acceptable downstream or not. This method is used to determine whether it makes sense to fetch a document
+ * in the first place.
+ *@param date is the date of the document.
+ *@return true if the document with that date can be accepted by the downstream connection.
+ */
+ public boolean checkDateIndexable(Date date)
+ throws ManifoldCFException, ServiceInterruption;
+
/** Detect if a mime type is acceptable downstream or not. This method is used to determine whether it makes sense to fetch a document
* in the first place.
*@param mimeType is the mime type of the document.
Modified: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineConnector.java?rev=1630188&r1=1630187&r2=1630188&view=diff
==============================================================================
--- manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineConnector.java (original)
+++ manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineConnector.java Wed Oct 8 17:54:47 2014
@@ -57,6 +57,16 @@ public interface IPipelineConnector exte
public VersionContext getPipelineDescription(Specification spec)
throws ManifoldCFException, ServiceInterruption;
+ /** Detect if a document date is acceptable or not. This method is used to determine whether it makes sense to fetch a document
+ * in the first place.
+ *@param pipelineDescription is the document's pipeline version string, for this connection.
+ *@param date is the date of the document.
+ *@param checkActivity is an object including the activities that can be performed by this method.
+ *@return true if the document with that date can be accepted by this connector.
+ */
+ public boolean checkDateIndexable(VersionContext pipelineDescription, Date date, IOutputCheckActivity checkActivity)
+ throws ManifoldCFException, ServiceInterruption;
+
/** Detect if a mime type is acceptable or not. This method is used to determine whether it makes sense to fetch a document
* in the first place.
*@param pipelineDescription is the document's pipeline version string, for this connection.
Modified: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/output/BaseOutputConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/output/BaseOutputConnector.java?rev=1630188&r1=1630187&r2=1630188&view=diff
==============================================================================
--- manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/output/BaseOutputConnector.java (original)
+++ manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/output/BaseOutputConnector.java Wed Oct 8 17:54:47 2014
@@ -81,6 +81,20 @@ public abstract class BaseOutputConnecto
// The base implementation does nothing here.
}
+ /** Detect if a document date is acceptable or not. This method is used to determine whether it makes sense to fetch a document
+ * in the first place.
+ *@param pipelineDescription is the document's pipeline version string, for this connection.
+ *@param date is the date of the document.
+ *@param checkActivity is an object including the activities that can be performed by this method.
+ *@return true if the document with that date can be accepted by this connector.
+ */
+ @Override
+ public boolean checkDateIndexable(VersionContext pipelineDescription, Date date, IOutputCheckActivity checkActivity)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ return true;
+ }
+
/** Detect if a mime type is acceptable or not. This method is used to determine whether it makes sense to fetch a document
* in the first place.
*@param pipelineDescription is the document's pipeline version string, for this connection.
Modified: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/transformation/BaseTransformationConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/transformation/BaseTransformationConnector.java?rev=1630188&r1=1630187&r2=1630188&view=diff
==============================================================================
--- manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/transformation/BaseTransformationConnector.java (original)
+++ manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/transformation/BaseTransformationConnector.java Wed Oct 8 17:54:47 2014
@@ -70,6 +70,20 @@ public abstract class BaseTransformation
return false;
}
+ /** Detect if a document date is acceptable or not. This method is used to determine whether it makes sense to fetch a document
+ * in the first place.
+ *@param pipelineDescription is the document's pipeline version string, for this connection.
+ *@param date is the date of the document.
+ *@param checkActivity is an object including the activities that can be performed by this method.
+ *@return true if the document with that date can be accepted by this connector.
+ */
+ @Override
+ public boolean checkDateIndexable(VersionContext pipelineDescription, Date date, IOutputCheckActivity checkActivity)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ return checkActivity.checkDateIndexable(date);
+ }
+
/** Detect if a mime type is acceptable or not. This method is used to determine whether it makes sense to fetch a document
* in the first place.
*@param pipelineDescription is the document's pipeline version string, for this connection.
Modified: manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IFingerprintActivity.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IFingerprintActivity.java?rev=1630188&r1=1630187&r2=1630188&view=diff
==============================================================================
--- manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IFingerprintActivity.java (original)
+++ manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IFingerprintActivity.java Wed Oct 8 17:54:47 2014
@@ -21,6 +21,7 @@ package org.apache.manifoldcf.crawler.in
import org.apache.manifoldcf.core.interfaces.*;
import org.apache.manifoldcf.agents.interfaces.*;
import java.io.*;
+import java.util.*;
/** This interface abstracts from the activities that handle document fingerprinting and mime type acceptance.
*/
@@ -28,6 +29,14 @@ public interface IFingerprintActivity
{
public static final String _rcsid = "@(#)$Id: IFingerprintActivity.java 988245 2010-08-23 18:39:35Z kwright $";
+ /** Detect if a date is indexable or not. This method is used by participating repository connectors to pre-filter the number of
+ * unusable documents that will be passed to this output connector.
+ *@param date is the date of the document; may be null
+ *@return true if a document with that date is indexable by this connector.
+ */
+ public boolean checkDateIndexable(Date date)
+ throws ManifoldCFException, ServiceInterruption;
+
/** Detect if a mime type is indexable or not. This method is used by participating repository connectors to pre-filter the number of
* unusable documents that will be passed to this output connector.
*@param mimeType is the mime type of the document.
Modified: manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java?rev=1630188&r1=1630187&r2=1630188&view=diff
==============================================================================
--- manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java (original)
+++ manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java Wed Oct 8 17:54:47 2014
@@ -1907,6 +1907,19 @@ public class WorkerThread extends Thread
abortSet.add(localIdentifier);
}
+ /** Detect if a date is indexable or not. This method is used by participating repository connectors to pre-filter the number of
+ * unusable documents that will be passed to this output connector.
+ *@param date is the date of the document; may be null
+ *@return true if a document with that date is indexable by this connector.
+ */
+ public boolean checkDateIndexable(Date date)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ return ingester.checkDateIndexable(
+ pipelineSpecification,date,
+ ingestLogger);
+ }
+
/** Check whether a mime type is indexable by the currently specified output connector.
*@param mimeType is the mime type to check, not including any character set specification.
*@return true if the mime type is indexable.
@@ -2318,6 +2331,18 @@ public class WorkerThread extends Thread
{
}
+ /** Detect if a date is acceptable downstream or not. This method is used to determine whether it makes sense to fetch a document
+ * in the first place.
+ *@param date is the document's date
+ *@return true if the document with that date can be accepted by the downstream connection.
+ */
+ @Override
+ public boolean checkDateIndexable(Date date)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ return false;
+ }
+
/** Detect if a mime type is acceptable downstream or not. This method is used to determine whether it makes sense to fetch a document
* in the first place.
*@param mimeType is the mime type of the document.