You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2013/08/14 03:12:13 UTC
svn commit: r1513705 - in /manifoldcf/trunk: ./
framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/
Author: kwright
Date: Wed Aug 14 01:12:13 2013
New Revision: 1513705
URL: http://svn.apache.org/r1513705
Log:
Fix for CONNECTORS-764.
Modified:
manifoldcf/trunk/CHANGES.txt
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/Jobs.java
Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1513705&r1=1513704&r2=1513705&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Wed Aug 14 01:12:13 2013
@@ -3,6 +3,10 @@ $Id$
======================= 1.4-dev =====================
+CONNECTORS-764: HOPCOUNTREMOVED records need to be reset when
+a job's hopcount limits change. It also makes sense to reset them
+when the set of documents is changed.
+
CONNECTORS-750: Skip files when catching FileNotFoundException,
e.g. access/permission denied files in FileConnector.
(Shinichiro Abe)
Modified: manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java?rev=1513705&r1=1513704&r2=1513705&view=diff
==============================================================================
--- manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java (original)
+++ manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java Wed Aug 14 01:12:13 2013
@@ -5456,6 +5456,7 @@ public class JobManager implements IJobM
boolean requestMinimum)
throws ManifoldCFException
{
+
// (1) If the connector has MODEL_ADD_CHANGE_DELETE, then
// we let the connector run the show; there's no purge phase, and therefore the
// documents are left in a COMPLETED state if they don't show up in the list
@@ -5474,7 +5475,7 @@ public class JobManager implements IJobM
if (connectorModel == IRepositoryConnector.MODEL_ADD_CHANGE_DELETE)
{
if (fromBeginningOfTime)
- jobQueue.queueAllExisting(jobID);
+ queueAllExisting(jobID,legalLinkTypes);
return;
}
@@ -5483,7 +5484,7 @@ public class JobManager implements IJobM
if (connectorModel == IRepositoryConnector.MODEL_CHAINED_ADD_CHANGE_DELETE)
{
if (fromBeginningOfTime)
- jobQueue.queueAllExisting(jobID);
+ queueAllExisting(jobID,legalLinkTypes);
else
jobQueue.preparePartialScan(jobID);
return;
@@ -5507,6 +5508,58 @@ public class JobManager implements IJobM
jobQueue.prepareIncrementalScan(jobID);
}
+ /** Queue all existing.
+ *@param jobID is the job id.
+ *@param legalLinkTypes are the link types allowed for the job.
+ */
+ protected void queueAllExisting(Long jobID, String[] legalLinkTypes)
+ throws ManifoldCFException
+ {
+ while (true)
+ {
+ long sleepAmt = 0L;
+ database.beginTransaction();
+ try
+ {
+ if (legalLinkTypes.length > 0)
+ {
+ jobQueue.reactivateHopcountRemovedRecords(jobID);
+ }
+
+ jobQueue.queueAllExisting(jobID);
+ TrackerClass.notePrecommit();
+ database.performCommit();
+ TrackerClass.noteCommit();
+ break;
+ }
+ catch (ManifoldCFException e)
+ {
+ database.signalRollback();
+ TrackerClass.noteRollback();
+ if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT)
+ {
+ if (Logging.perf.isDebugEnabled())
+ Logging.perf.debug("Aborted transaction during queueAllExisting: "+e.getMessage());
+ sleepAmt = getRandomAmount();
+ continue;
+ }
+ throw e;
+ }
+ catch (Error e)
+ {
+ database.signalRollback();
+ TrackerClass.noteRollback();
+ throw e;
+ }
+ finally
+ {
+ database.endTransaction();
+ sleepFor(sleepAmt);
+ }
+ }
+
+ }
+
/** Prepare for a full scan.
*@param jobID is the job id.
*@param legalLinkTypes are the link types allowed for the job.
Modified: manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java?rev=1513705&r1=1513704&r2=1513705&view=diff
==============================================================================
--- manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java (original)
+++ manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java Wed Aug 14 01:12:13 2013
@@ -562,7 +562,7 @@ public class JobQueue extends org.apache
public void prepareFullScan(Long jobID)
throws ManifoldCFException
{
- // Delete PENDING entries
+ // Delete PENDING and HOPCOUNTREMOVED entries (they are treated the same)
ArrayList list = new ArrayList();
list.add(jobID);
list.add(statusToString(STATUS_PENDING));
@@ -571,7 +571,9 @@ public class JobQueue extends org.apache
list.clear();
String query = buildConjunctionClause(list,new ClauseDescription[]{
new UnitaryClause(jobIDField,jobID),
- new UnitaryClause(statusField,statusToString(STATUS_PENDING))});
+ new MultiClause(statusField,new Object[]{
+ statusToString(STATUS_PENDING),
+ statusToString(STATUS_HOPCOUNTREMOVED)})});
performDelete("WHERE "+query,list,null);
// Turn PENDINGPURGATORY and COMPLETED into PURGATORY.
@@ -626,7 +628,7 @@ public class JobQueue extends org.apache
noteModifications(0,1,0);
// Do an analyze, otherwise our plans are going to be crap right off the bat
unconditionallyAnalyzeTables();
- }
+ }
/** Prepare for a "partial" job. This is called ONLY when the job is inactive.
*
Modified: manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/Jobs.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/Jobs.java?rev=1513705&r1=1513704&r2=1513705&view=diff
==============================================================================
--- manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/Jobs.java (original)
+++ manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/Jobs.java Wed Aug 14 01:12:13 2013
@@ -752,6 +752,33 @@ public class Jobs extends org.apache.man
isSame = false;
}
+ if (isSame)
+ {
+ // Compare hopcount filter criteria.
+ Map filterRows = hopFilterManager.readRows(id);
+ Map newFilterRows = jobDescription.getHopCountFilters();
+ if (filterRows.size() != newFilterRows.size())
+ isSame = false;
+ else
+ {
+ for (String linkType : (Collection<String>)filterRows.keySet())
+ {
+ Integer oldCount = (Integer)filterRows.get(linkType);
+ Integer newCount = (Integer)newFilterRows.get(linkType);
+ if (oldCount == null || newCount == null)
+ {
+ isSame = false;
+ break;
+ }
+ else if (oldCount.intValue() != newCount.intValue())
+ {
+ isSame = false;
+ break;
+ }
+ }
+ }
+ }
+
if (!isSame)
values.put(lastCheckTimeField,null);