You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2011/04/25 00:53:00 UTC
svn commit: r1096386 - in /incubator/lcf/trunk: ./
connectors/gts/connector/src/main/java/org/apache/manifoldcf/agents/output/gts/
connectors/nulloutput/connector/src/main/java/org/apache/manifoldcf/agents/output/nullconnector/
connectors/solr/connecto...
Author: kwright
Date: Sun Apr 24 22:52:59 2011
New Revision: 1096386
URL: http://svn.apache.org/viewvc?rev=1096386&view=rev
Log:
Fix for CONNECTORS-186. Refactor WorkerThread so that it can deal with a service interruption from the getOutputDescription() method.
Modified:
incubator/lcf/trunk/CHANGES.txt
incubator/lcf/trunk/connectors/gts/connector/src/main/java/org/apache/manifoldcf/agents/output/gts/GTSConnector.java
incubator/lcf/trunk/connectors/nulloutput/connector/src/main/java/org/apache/manifoldcf/agents/output/nullconnector/NullConnector.java
incubator/lcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConnector.java
incubator/lcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java
incubator/lcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java
incubator/lcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputConnector.java
incubator/lcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
Modified: incubator/lcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/CHANGES.txt?rev=1096386&r1=1096385&r2=1096386&view=diff
==============================================================================
--- incubator/lcf/trunk/CHANGES.txt (original)
+++ incubator/lcf/trunk/CHANGES.txt Sun Apr 24 22:52:59 2011
@@ -6,6 +6,11 @@ CONNECTORS-185: Clarify the build-and-de
it is clear that the configfile define is needed for the application server.
(Mark Moloney, Karl Wright)
+CONNECTORS-186: Refactor WorkerThread code to permit all output
+connector methods to throw a ServiceInterruption, including the
+getOutputDescription() method.
+(Karl Wright)
+
CONNECTORS-183: Add a text field to make Active Directory authority
configuration more flexible with regards to protocol.
(Shinichiro Abe, Karl Wright)
Modified: incubator/lcf/trunk/connectors/gts/connector/src/main/java/org/apache/manifoldcf/agents/output/gts/GTSConnector.java
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/connectors/gts/connector/src/main/java/org/apache/manifoldcf/agents/output/gts/GTSConnector.java?rev=1096386&r1=1096385&r2=1096386&view=diff
==============================================================================
--- incubator/lcf/trunk/connectors/gts/connector/src/main/java/org/apache/manifoldcf/agents/output/gts/GTSConnector.java (original)
+++ incubator/lcf/trunk/connectors/gts/connector/src/main/java/org/apache/manifoldcf/agents/output/gts/GTSConnector.java Sun Apr 24 22:52:59 2011
@@ -242,7 +242,7 @@ public class GTSConnector extends org.ap
* the document will not need to be sent again to the output data store.
*/
public String getOutputDescription(OutputSpecification spec)
- throws ManifoldCFException
+ throws ManifoldCFException, ServiceInterruption
{
// The information we want in this string is:
// (1) the collection name(s), in sorted order.
Modified: incubator/lcf/trunk/connectors/nulloutput/connector/src/main/java/org/apache/manifoldcf/agents/output/nullconnector/NullConnector.java
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/connectors/nulloutput/connector/src/main/java/org/apache/manifoldcf/agents/output/nullconnector/NullConnector.java?rev=1096386&r1=1096385&r2=1096386&view=diff
==============================================================================
--- incubator/lcf/trunk/connectors/nulloutput/connector/src/main/java/org/apache/manifoldcf/agents/output/nullconnector/NullConnector.java (original)
+++ incubator/lcf/trunk/connectors/nulloutput/connector/src/main/java/org/apache/manifoldcf/agents/output/nullconnector/NullConnector.java Sun Apr 24 22:52:59 2011
@@ -113,7 +113,7 @@ public class NullConnector extends org.a
* the document will not need to be sent again to the output data store.
*/
public String getOutputDescription(OutputSpecification spec)
- throws ManifoldCFException
+ throws ManifoldCFException, ServiceInterruption
{
return "";
}
Modified: incubator/lcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConnector.java
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConnector.java?rev=1096386&r1=1096385&r2=1096386&view=diff
==============================================================================
--- incubator/lcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConnector.java (original)
+++ incubator/lcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConnector.java Sun Apr 24 22:52:59 2011
@@ -197,7 +197,7 @@ public class SolrConnector extends org.a
* the document will not need to be sent again to the output data store.
*/
public String getOutputDescription(OutputSpecification spec)
- throws ManifoldCFException
+ throws ManifoldCFException, ServiceInterruption
{
StringBuffer sb = new StringBuffer();
Modified: incubator/lcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java?rev=1096386&r1=1096385&r2=1096386&view=diff
==============================================================================
--- incubator/lcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java (original)
+++ incubator/lcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java Sun Apr 24 22:52:59 2011
@@ -188,7 +188,7 @@ public class IncrementalIngester extends
IOutputConnector connector = OutputConnectorFactory.grab(threadContext,connection.getClassName(),connection.getConfigParams(),connection.getMaxConnections());
if (connector == null)
// The connector is not installed; treat this as a service interruption.
- throw new ServiceInterruption("Output connector not installed",300000L);
+ throw new ServiceInterruption("Output connector not installed",0L);
try
{
return connector.checkMimeTypeIndexable(mimeType);
@@ -211,7 +211,7 @@ public class IncrementalIngester extends
IOutputConnector connector = OutputConnectorFactory.grab(threadContext,connection.getClassName(),connection.getConfigParams(),connection.getMaxConnections());
if (connector == null)
// The connector is not installed; treat this as a service interruption.
- throw new ServiceInterruption("Output connector not installed",300000L);
+ throw new ServiceInterruption("Output connector not installed",0L);
try
{
return connector.checkDocumentIndexable(localFile);
@@ -221,7 +221,31 @@ public class IncrementalIngester extends
OutputConnectorFactory.release(connector);
}
}
-
+
+ /** Get an output version string for a document.
+ *@param outputConnectionName is the name of the output connection associated with this action.
+ *@param spec is the output specification.
+ *@return the description string.
+ */
+ public String getOutputDescription(String outputConnectionName, OutputSpecification spec)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ IOutputConnection connection = connectionManager.load(outputConnectionName);
+ IOutputConnector connector = OutputConnectorFactory.grab(threadContext,connection.getClassName(),connection.getConfigParams(),connection.getMaxConnections());
+ if (connector == null)
+ // The connector is not installed; treat this as a service interruption.
+ throw new ServiceInterruption("Output connector not installed",0L);
+ try
+ {
+ return connector.getOutputDescription(spec);
+ }
+ finally
+ {
+ OutputConnectorFactory.release(connector);
+ }
+
+ }
+
/** Record a document version, but don't ingest it.
* The purpose of this method is to keep track of the frequency at which ingestion "attempts" take place.
* ServiceInterruption is thrown if this action must be rescheduled.
@@ -1403,7 +1427,7 @@ public class IncrementalIngester extends
IOutputConnector connector = OutputConnectorFactory.grab(threadContext,connection.getClassName(),connection.getConfigParams(),connection.getMaxConnections());
if (connector == null)
// The connector is not installed; treat this as a service interruption.
- throw new ServiceInterruption("Output connector not installed",300000L);
+ throw new ServiceInterruption("Output connector not installed",0L);
try
{
return connector.addOrReplaceDocument(documentURI,outputDescription,document,authorityNameString,activities);
@@ -1422,7 +1446,7 @@ public class IncrementalIngester extends
IOutputConnector connector = OutputConnectorFactory.grab(threadContext,connection.getClassName(),connection.getConfigParams(),connection.getMaxConnections());
if (connector == null)
// The connector is not installed; treat this as a service interruption.
- throw new ServiceInterruption("Output connector not installed",300000L);
+ throw new ServiceInterruption("Output connector not installed",0L);
try
{
connector.removeDocument(documentURI,outputDescription,activities);
Modified: incubator/lcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java?rev=1096386&r1=1096385&r2=1096386&view=diff
==============================================================================
--- incubator/lcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java (original)
+++ incubator/lcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java Sun Apr 24 22:52:59 2011
@@ -74,6 +74,14 @@ public interface IIncrementalIngester
public boolean checkDocumentIndexable(String outputConnectionName, File localFile)
throws ManifoldCFException, ServiceInterruption;
+ /** Get an output version string for a document.
+ *@param outputConnectionName is the name of the output connection associated with this action.
+ *@param spec is the output specification.
+ *@return the description string.
+ */
+ public String getOutputDescription(String outputConnectionName, OutputSpecification spec)
+ throws ManifoldCFException, ServiceInterruption;
+
/** Record a document version, but don't ingest it.
* The purpose of this method is to keep track of the frequency at which ingestion "attempts" take place.
* ServiceInterruption is thrown if this action must be rescheduled.
Modified: incubator/lcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputConnector.java
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputConnector.java?rev=1096386&r1=1096385&r2=1096386&view=diff
==============================================================================
--- incubator/lcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputConnector.java (original)
+++ incubator/lcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputConnector.java Sun Apr 24 22:52:59 2011
@@ -102,7 +102,7 @@ public interface IOutputConnector extend
* the document will not need to be sent again to the output data store.
*/
public String getOutputDescription(OutputSpecification spec)
- throws ManifoldCFException;
+ throws ManifoldCFException, ServiceInterruption;
/** Add (or replace) a document in the output data store using the connector.
* This method presumes that the connector object has been configured, and it is thus able to communicate with the output data store should that be
Modified: incubator/lcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java?rev=1096386&r1=1096385&r2=1096386&view=diff
==============================================================================
--- incubator/lcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java (original)
+++ incubator/lcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java Sun Apr 24 22:52:59 2011
@@ -144,8 +144,7 @@ public class WorkerThread extends Thread
int jobType = job.getType();
IRepositoryConnection connection = qds.getConnection();
- IOutputConnection outputConnection = outputMgr.load(outputName);
-
+
OutputActivity ingestLogger = new OutputActivity(connectionName,connMgr,outputName);
// Put together document id's into an array, and versions into a map
@@ -271,29 +270,6 @@ public class WorkerThread extends Thread
// This try is so that we can process errors from getting a connection specially
try
{
- // All documents in this batch will have the same output version string, so calculate that string up front.
- String outputVersion = null;
- IOutputConnector outputConnector = OutputConnectorFactory.grab(threadContext,
- outputConnection.getClassName(),
- outputConnection.getConfigParams(),
- outputConnection.getMaxConnections());
- // If we wind up with a null here, it means that a document got queued for an output connector which is now gone.
- // Basically, what we want to do in that case is to treat this kind of like a service interruption - the document
- // must be requeued for immediate reprocessing. When the rest of the world figures out that the job that owns this
- // document is in fact unable to function, we'll stop getting such documents handed to us, because the state of the
- // job will be changed.
- if (outputConnector != null)
- {
- try
- {
- outputVersion = outputConnector.getOutputDescription(outputSpec);
- }
- finally
- {
- OutputConnectorFactory.release(outputConnector);
- }
- }
-
// Grab a connector handle
IRepositoryConnector connector = RepositoryConnectorFactory.grab(threadContext,
connection.getClassName(),
@@ -305,7 +281,7 @@ public class WorkerThread extends Thread
// must be requeued for immediate reprocessing. When the rest of the world figures out that the job that owns this
// document is in fact unable to function, we'll stop getting such documents handed to us, because the state of the
// job will be changed.
- if (connector == null || outputConnector == null)
+ if (connector == null)
{
i = 0;
while (i < qds.getCount())
@@ -326,6 +302,9 @@ public class WorkerThread extends Thread
if (Thread.currentThread().isInterrupted())
throw new ManifoldCFException("Interrupted",ManifoldCFException.INTERRUPTED);
+ // Get the output version string.
+ String outputVersion = ingester.getOutputDescription(outputName,outputSpec);
+
HashMap abortSet = new HashMap();
ProcessActivity activity;
VersionActivity versionActivity = new VersionActivity(connectionName,connMgr,jobManager,job,ingester,abortSet);