You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/11/05 20:30:37 UTC
svn commit: r1636942 - in /manifoldcf/branches/release-1.7-branch: ./
framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/
Author: kwright
Date: Wed Nov 5 19:30:37 2014
New Revision: 1636942
URL: http://svn.apache.org/r1636942
Log:
Pull up changes for CONNECTORS-1094 from dev_1x branch
Modified:
manifoldcf/branches/release-1.7-branch/ (props changed)
manifoldcf/branches/release-1.7-branch/CHANGES.txt
manifoldcf/branches/release-1.7-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ManifoldCF.java
manifoldcf/branches/release-1.7-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PriorityCalculator.java
Propchange: manifoldcf/branches/release-1.7-branch/
------------------------------------------------------------------------------
Merged /manifoldcf/branches/dev_1x:r1636941
Merged /manifoldcf/trunk:r1636940
Modified: manifoldcf/branches/release-1.7-branch/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/branches/release-1.7-branch/CHANGES.txt?rev=1636942&r1=1636941&r2=1636942&view=diff
==============================================================================
--- manifoldcf/branches/release-1.7-branch/CHANGES.txt (original)
+++ manifoldcf/branches/release-1.7-branch/CHANGES.txt Wed Nov 5 19:30:37 2014
@@ -4,6 +4,10 @@ $Id$
======================= Release 1.7.2 =====================
+CONNECTORS-1094: Performance improvements for document
+reprioritization.
+(Aeham Abushwashi, Karl Wright)
+
CONNECTORS-1093: Need to preload document priorities for reset
of all priorities, for performance.
(Karl Wright)
Modified: manifoldcf/branches/release-1.7-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ManifoldCF.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/release-1.7-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ManifoldCF.java?rev=1636942&r1=1636941&r2=1636942&view=diff
==============================================================================
--- manifoldcf/branches/release-1.7-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ManifoldCF.java (original)
+++ manifoldcf/branches/release-1.7-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ManifoldCF.java Wed Nov 5 19:30:37 2014
@@ -1077,8 +1077,11 @@ public class ManifoldCF extends org.apac
IPriorityCalculator[] priorities = new IPriorityCalculator[descs.length];
- // Go through the documents and calculate the priorities
rt.clearPreloadRequests();
+
+ // Compute the list of connector instances we will need.
+ // This has a side effect of fetching all job descriptions too.
+ Set<String> connectionNames = new HashSet<String>();
for (int i = 0; i < descs.length; i++)
{
DocumentDescription dd = descs[i];
@@ -1088,33 +1091,60 @@ public class ManifoldCF extends org.apac
job = jobManager.load(dd.getJobID(),true);
jobDescriptionMap.put(dd.getJobID(),job);
}
- String connectionName = job.getConnectionName();
+ connectionNames.add(job.getConnectionName());
+ }
+ String[] orderingKeys = new String[connectionNames.size()];
+ IRepositoryConnection[] connections = new IRepositoryConnection[connectionNames.size()];
+ int z = 0;
+ for (String connectionName : connectionNames)
+ {
+ orderingKeys[z] = connectionName;
IRepositoryConnection connection = connectionMap.get(connectionName);
if (connection == null)
{
connection = mgr.load(connectionName);
connectionMap.put(connectionName,connection);
}
+ connections[z] = connection;
+ z++;
+ }
- String[] binNames;
- // Grab a connector handle
- IRepositoryConnector connector = repositoryConnectorPool.grab(connection);
- try
+ // Now, grab the connector instances we need
+ IRepositoryConnector[] connectors = repositoryConnectorPool.grabMultiple(orderingKeys,connections);
+ try
+ {
+ // Map from connection name to connector instance
+ Map<String,IRepositoryConnector> connectorMap = new HashMap<String,IRepositoryConnector>();
+ for (z = 0; z < orderingKeys.length; z++)
{
+ connectorMap.put(orderingKeys[z],connectors[z]);
+ }
+ // Go through the documents and calculate the priorities
+ double minimumDepth = rt.getMinimumDepth();
+ for (int i = 0; i < descs.length; i++)
+ {
+ DocumentDescription dd = descs[i];
+ IJobDescription job = jobDescriptionMap.get(dd.getJobID());
+ String connectionName = job.getConnectionName();
+ IRepositoryConnector connector = connectorMap.get(connectionName);
+ IRepositoryConnection connection = connectionMap.get(connectionName);
+ String[] binNames;
if (connector == null)
binNames = new String[]{""};
else
// Get the bins for the document identifier
binNames = connector.getBinNames(descs[i].getDocumentIdentifier());
+ PriorityCalculator p = new PriorityCalculator(rt,minimumDepth,connection,binNames);
+ priorities[i] = p;
+ p.makePreloadRequest();
}
- finally
- {
- repositoryConnectorPool.release(connection,connector);
- }
- PriorityCalculator p = new PriorityCalculator(rt,connection,binNames);
- priorities[i] = p;
- p.makePreloadRequest();
}
+ finally
+ {
+ // Release all the connector instances we grabbed
+ repositoryConnectorPool.releaseMultiple(connections,connectors);
+ }
+
rt.preloadBinValues();
// Now, write all the priorities we can.
Modified: manifoldcf/branches/release-1.7-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PriorityCalculator.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/release-1.7-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PriorityCalculator.java?rev=1636942&r1=1636941&r2=1636942&view=diff
==============================================================================
--- manifoldcf/branches/release-1.7-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PriorityCalculator.java (original)
+++ manifoldcf/branches/release-1.7-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PriorityCalculator.java Wed Nov 5 19:30:37 2014
@@ -48,6 +48,12 @@ public class PriorityCalculator implemen
public PriorityCalculator(IReprioritizationTracker rt, IRepositoryConnection connection, String[] documentBins)
throws ManifoldCFException
{
+ this(rt,rt.getMinimumDepth(),connection,documentBins);
+ }
+
+ public PriorityCalculator(IReprioritizationTracker rt, double currentMinimumDepth, IRepositoryConnection connection, String[] documentBins)
+ throws ManifoldCFException
+ {
this.connection = connection;
this.binNames = documentBins;
this.rt = rt;
@@ -86,8 +92,6 @@ public class PriorityCalculator implemen
double[] maxFetchRates = calculateMaxFetchRates(binNames,connection);
// Before calculating priority, calculate some factors that will allow us to determine the proper starting value for a bin.
- double currentMinimumDepth = rt.getMinimumDepth();
-
// First thing to do is to reset the bin values based on the current minimum.
for (int i = 0; i < binNames.length; i++)
{