You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/12/17 20:37:37 UTC

svn commit: r1646318 - in /manifoldcf/branches/release-2.0-branch: ./ framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/ framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ site/src/documentation/content...

Author: kwright
Date: Wed Dec 17 19:37:36 2014
New Revision: 1646318

URL: http://svn.apache.org/r1646318
Log:
Pull up fix for CONNECTORS-1123 from trunk.

Modified:
    manifoldcf/branches/release-2.0-branch/   (props changed)
    manifoldcf/branches/release-2.0-branch/CHANGES.txt
    manifoldcf/branches/release-2.0-branch/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java
    manifoldcf/branches/release-2.0-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/JobResetThread.java
    manifoldcf/branches/release-2.0-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java
    manifoldcf/branches/release-2.0-branch/site/src/documentation/content/xdocs/en_US/end-user-documentation.xml

Propchange: manifoldcf/branches/release-2.0-branch/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Wed Dec 17 19:37:36 2014
@@ -119,4 +119,4 @@
 /manifoldcf/branches/CONNECTORS-981:1605049-1605773
 /manifoldcf/branches/CONNECTORS-989:1611600-1612101
 /manifoldcf/branches/CONNECTORS-990:1610284-1610707
-/manifoldcf/trunk:1644252,1644399,1644538,1644920,1644931
+/manifoldcf/trunk:1644252,1644399,1644538,1644920,1644931,1646317

Modified: manifoldcf/branches/release-2.0-branch/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/branches/release-2.0-branch/CHANGES.txt?rev=1646318&r1=1646317&r2=1646318&view=diff
==============================================================================
--- manifoldcf/branches/release-2.0-branch/CHANGES.txt (original)
+++ manifoldcf/branches/release-2.0-branch/CHANGES.txt Wed Dec 17 19:37:36 2014
@@ -3,6 +3,10 @@ $Id$
 
 ======================= Release 2.0 =====================
 
+CONNECTORS-1123: Reduce the maximum number of zookeeper lock
+nodes, by hashing the lock names that correspond to documents.
+(Aeham Abushwashi, Karl Wright)
+
 CONNECTORS-1121: Plugins have moved in the dist repository, so
 point at them properly.
 (Kamil Żyta, Karl Wright)

Modified: manifoldcf/branches/release-2.0-branch/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/release-2.0-branch/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java?rev=1646318&r1=1646317&r2=1646318&view=diff
==============================================================================
--- manifoldcf/branches/release-2.0-branch/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java (original)
+++ manifoldcf/branches/release-2.0-branch/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java Wed Dec 17 19:37:36 2014
@@ -963,6 +963,15 @@ public class IncrementalIngester extends
     }
   }
 
+  protected static String createURILockName(String outputConnectionName, String uriHash)
+  {
+    // The lock name needs to be constrained to some acceptably small number in order to avoid
+    // a lot of zookeeper locks.  See CONNECTORS-1123.
+    int hashCode = outputConnectionName.hashCode() + uriHash.hashCode();
+    hashCode &= 0xffff;
+    return "URILOCK-"+hashCode;
+  }
+  
   /** Delete multiple documents from the search engine index.
   *@param pipelineConnections is the pipeline specification.
   *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
@@ -1018,7 +1027,7 @@ public class IncrementalIngester extends
         {
           validURIArray[validURIcount] = uri.getURI();
           validURIHashArray[validURIcount] = uri.getURIHash();
-          lockArray[validURIcount] = outputConnectionName+":"+validURIHashArray[validURIcount];
+          lockArray[validURIcount] = createURILockName(outputConnectionName,validURIHashArray[validURIcount]);
           validURIcount++;
         }
       }
@@ -1233,7 +1242,7 @@ public class IncrementalIngester extends
         {
           validURIArray[validURIcount] = uri.getURI();
           validURIHashArray[validURIcount] = uri.getURIHash();
-          lockArray[validURIcount] = outputConnectionName+":"+validURIHashArray[validURIcount];
+          lockArray[validURIcount] = createURILockName(outputConnectionName,validURIHashArray[validURIcount]);
           validURIcount++;
         }
       }
@@ -3407,19 +3416,19 @@ public class IncrementalIngester extends
     }
   }
 
-  protected static String[] computeLockArray(String documentURI, String oldURI, String outputConnectionName)
+  protected static String[] computeLockArray(String documentURIHash, String oldURIHash, String outputConnectionName)
   {
     int uriCount = 0;
-    if (documentURI != null)
+    if (documentURIHash != null)
       uriCount++;
-    if (oldURI != null && (documentURI == null || !documentURI.equals(oldURI)))
+    if (oldURIHash != null && (documentURIHash == null || !documentURIHash.equals(oldURIHash)))
       uriCount++;
     String[] lockArray = new String[uriCount];
     uriCount = 0;
-    if (documentURI != null)
-      lockArray[uriCount++] = outputConnectionName+":"+documentURI;
-    if (oldURI != null && (documentURI == null || !documentURI.equals(oldURI)))
-      lockArray[uriCount++] = outputConnectionName+":"+oldURI;
+    if (documentURIHash != null)
+      lockArray[uriCount++] = createURILockName(outputConnectionName,documentURIHash);
+    if (oldURIHash != null && (documentURIHash == null || !documentURIHash.equals(oldURIHash)))
+      lockArray[uriCount++] = createURILockName(outputConnectionName,oldURIHash);
     return lockArray;
   }
   

Modified: manifoldcf/branches/release-2.0-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/JobResetThread.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/release-2.0-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/JobResetThread.java?rev=1646318&r1=1646317&r2=1646318&view=diff
==============================================================================
--- manifoldcf/branches/release-2.0-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/JobResetThread.java (original)
+++ manifoldcf/branches/release-2.0-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/JobResetThread.java Wed Dec 17 19:37:36 2014
@@ -102,7 +102,7 @@ public class JobResetThread extends Thre
           // not predicted by the algorithm that assigned those priorities.  This is, of course, quite expensive,
           // but it cannot be helped (at least, I cannot find a way to avoid it).
           //
-          if (jobStops.size() > 0 || jobResumes.size() > 0)
+          if (jobStops.size() > 0 /*|| jobResumes.size() > 0 */)
           {
             Logging.threads.debug("Job reset thread reprioritizing documents...");
 

Modified: manifoldcf/branches/release-2.0-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/release-2.0-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java?rev=1646318&r1=1646317&r2=1646318&view=diff
==============================================================================
--- manifoldcf/branches/release-2.0-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java (original)
+++ manifoldcf/branches/release-2.0-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java Wed Dec 17 19:37:36 2014
@@ -139,7 +139,7 @@ public class StartupThread extends Threa
                     model,jobType == IJobDescription.TYPE_CONTINUOUS,lastSeedingVersion == null,
                     requestMinimum);
                   
-                  ManifoldCF.resetAllDocumentPriorities(threadContext,processID);
+                  //ManifoldCF.resetAllDocumentPriorities(threadContext,processID);
 
                   if (Logging.threads.isDebugEnabled())
                     Logging.threads.debug("Prepared job "+jobID.toString()+" for execution.");

Modified: manifoldcf/branches/release-2.0-branch/site/src/documentation/content/xdocs/en_US/end-user-documentation.xml
URL: http://svn.apache.org/viewvc/manifoldcf/branches/release-2.0-branch/site/src/documentation/content/xdocs/en_US/end-user-documentation.xml?rev=1646318&r1=1646317&r2=1646318&view=diff
==============================================================================
--- manifoldcf/branches/release-2.0-branch/site/src/documentation/content/xdocs/en_US/end-user-documentation.xml (original)
+++ manifoldcf/branches/release-2.0-branch/site/src/documentation/content/xdocs/en_US/end-user-documentation.xml Wed Dec 17 19:37:36 2014
@@ -1058,6 +1058,12 @@ curl -XGET http://localhost:9200/index/_
                 <p>Note that in this example, the Active Directory connection is not responding, which is leading to an error status message instead of "Connection working".</p>
             </section>
 
+            <section id="alfrescowebscriptauthority">
+                <title>Alfresco Webscript Authority Connection</title>
+                <p>The Alfresco Webscript authority connection type helps secure documents indexed using the Alfresco Webscript repository connection type.</p>
+                <p>RAFA: Please fill out</p>
+            </section>
+            
             <section id="cmisauthority">
               <title>CMIS Authority Connection</title>
               <p>A CMIS authority connection is required for enforcing security for documents retrieved from CMIS repositories.</p>
@@ -1462,6 +1468,14 @@ curl -XGET http://localhost:9200/index/_
               <br/><br/>
             </section>
 
+            <section id="alfrescowebscriptrepository">
+              <title>Alfresco Webscript Repository Connection</title>
+              <p>The Alfresco Webscript Repository connection type allows you to index content from an Alfresco repository.  It also supports document
+                    security, in conjunction with the Alfresco Webscript Authority connection Type.</p>
+              <p>This connector is compatible with any Alfresco version (???).  RAFA: Please fill this out.</p>
+
+            </section>
+            
             <section id="cmisrepository">
               <title>CMIS Repository Connection</title>
               <p>The CMIS Repository Connection type allows you to index content from any CMIS-compliant repository.</p>