You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/11/05 20:30:37 UTC

svn commit: r1636942 - in /manifoldcf/branches/release-1.7-branch: ./ framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/

Author: kwright
Date: Wed Nov  5 19:30:37 2014
New Revision: 1636942

URL: http://svn.apache.org/r1636942
Log:
Pull up changes for CONNECTORS-1094 from dev_1x branch

Modified:
    manifoldcf/branches/release-1.7-branch/   (props changed)
    manifoldcf/branches/release-1.7-branch/CHANGES.txt
    manifoldcf/branches/release-1.7-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ManifoldCF.java
    manifoldcf/branches/release-1.7-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PriorityCalculator.java

Propchange: manifoldcf/branches/release-1.7-branch/
------------------------------------------------------------------------------
  Merged /manifoldcf/branches/dev_1x:r1636941
  Merged /manifoldcf/trunk:r1636940

Modified: manifoldcf/branches/release-1.7-branch/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/branches/release-1.7-branch/CHANGES.txt?rev=1636942&r1=1636941&r2=1636942&view=diff
==============================================================================
--- manifoldcf/branches/release-1.7-branch/CHANGES.txt (original)
+++ manifoldcf/branches/release-1.7-branch/CHANGES.txt Wed Nov  5 19:30:37 2014
@@ -4,6 +4,10 @@ $Id$
 
 ======================= Release 1.7.2 =====================
 
+CONNECTORS-1094: Performance improvements for document
+reprioritization.
+(Aeham Abushwashi, Karl Wright)
+
 CONNECTORS-1093: Need to preload document priorities for reset
 of all priorities, for performance.
 (Karl Wright)

Modified: manifoldcf/branches/release-1.7-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ManifoldCF.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/release-1.7-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ManifoldCF.java?rev=1636942&r1=1636941&r2=1636942&view=diff
==============================================================================
--- manifoldcf/branches/release-1.7-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ManifoldCF.java (original)
+++ manifoldcf/branches/release-1.7-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ManifoldCF.java Wed Nov  5 19:30:37 2014
@@ -1077,8 +1077,11 @@ public class ManifoldCF extends org.apac
 
     IPriorityCalculator[] priorities = new IPriorityCalculator[descs.length];
 
-    // Go through the documents and calculate the priorities
     rt.clearPreloadRequests();
+    
+    // Compute the list of connector instances we will need.
+    // This has a side effect of fetching all job descriptions too.
+    Set<String> connectionNames = new HashSet<String>();
     for (int i = 0; i < descs.length; i++)
     {
       DocumentDescription dd = descs[i];
@@ -1088,33 +1091,60 @@ public class ManifoldCF extends org.apac
         job = jobManager.load(dd.getJobID(),true);
         jobDescriptionMap.put(dd.getJobID(),job);
       }
-      String connectionName = job.getConnectionName();
+      connectionNames.add(job.getConnectionName());
+    }
+    String[] orderingKeys = new String[connectionNames.size()];
+    IRepositoryConnection[] connections = new IRepositoryConnection[connectionNames.size()];
+    int z = 0;
+    for (String connectionName : connectionNames)
+    {
+      orderingKeys[z] = connectionName;
       IRepositoryConnection connection = connectionMap.get(connectionName);
       if (connection == null)
       {
         connection = mgr.load(connectionName);
         connectionMap.put(connectionName,connection);
       }
+      connections[z] = connection;
+      z++;
+    }
 
-      String[] binNames;
-      // Grab a connector handle
-      IRepositoryConnector connector = repositoryConnectorPool.grab(connection);
-      try
+    // Now, grab the connector instances we need
+    IRepositoryConnector[] connectors = repositoryConnectorPool.grabMultiple(orderingKeys,connections);
+    try
+    {
+      // Map from connection name to connector instance
+      Map<String,IRepositoryConnector> connectorMap = new HashMap<String,IRepositoryConnector>();
+      for (z = 0; z < orderingKeys.length; z++)
       {
+        connectorMap.put(orderingKeys[z],connectors[z]);
+      }
+      // Go through the documents and calculate the priorities
+      double minimumDepth = rt.getMinimumDepth();
+      for (int i = 0; i < descs.length; i++)
+      {
+        DocumentDescription dd = descs[i];
+        IJobDescription job = jobDescriptionMap.get(dd.getJobID());
+        String connectionName = job.getConnectionName();
+        IRepositoryConnector connector = connectorMap.get(connectionName);
+        IRepositoryConnection connection = connectionMap.get(connectionName);
+        String[] binNames;
         if (connector == null)
           binNames = new String[]{""};
         else
           // Get the bins for the document identifier
           binNames = connector.getBinNames(descs[i].getDocumentIdentifier());
+        PriorityCalculator p = new PriorityCalculator(rt,minimumDepth,connection,binNames);
+        priorities[i] = p;
+        p.makePreloadRequest();
       }
-      finally
-      {
-        repositoryConnectorPool.release(connection,connector);
-      }
-      PriorityCalculator p = new PriorityCalculator(rt,connection,binNames);
-      priorities[i] = p;
-      p.makePreloadRequest();
     }
+    finally
+    {
+      // Release all the connector instances we grabbed
+      repositoryConnectorPool.releaseMultiple(connections,connectors);
+    }
+    
     rt.preloadBinValues();
     
     // Now, write all the priorities we can.

Modified: manifoldcf/branches/release-1.7-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PriorityCalculator.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/release-1.7-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PriorityCalculator.java?rev=1636942&r1=1636941&r2=1636942&view=diff
==============================================================================
--- manifoldcf/branches/release-1.7-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PriorityCalculator.java (original)
+++ manifoldcf/branches/release-1.7-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PriorityCalculator.java Wed Nov  5 19:30:37 2014
@@ -48,6 +48,12 @@ public class PriorityCalculator implemen
   public PriorityCalculator(IReprioritizationTracker rt, IRepositoryConnection connection, String[] documentBins)
     throws ManifoldCFException
   {
+    this(rt,rt.getMinimumDepth(),connection,documentBins);
+  }
+  
+  public PriorityCalculator(IReprioritizationTracker rt, double currentMinimumDepth, IRepositoryConnection connection, String[] documentBins)
+    throws ManifoldCFException
+  {
     this.connection = connection;
     this.binNames = documentBins;
     this.rt = rt;
@@ -86,8 +92,6 @@ public class PriorityCalculator implemen
     double[] maxFetchRates = calculateMaxFetchRates(binNames,connection);
 
     // Before calculating priority, calculate some factors that will allow us to determine the proper starting value for a bin.
-    double currentMinimumDepth = rt.getMinimumDepth();
-
     // First thing to do is to reset the bin values based on the current minimum.
     for (int i = 0; i < binNames.length; i++)
     {