You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/07/21 16:14:53 UTC

svn commit: r1612276 - /manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java

Author: kwright
Date: Mon Jul 21 14:14:53 2014
New Revision: 1612276

URL: http://svn.apache.org/r1612276
Log:
removeDocument wasn't quite right before; worked more like deleteDocument

Modified:
    manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java

Modified: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java?rev=1612276&r1=1612275&r2=1612276&view=diff
==============================================================================
--- manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java (original)
+++ manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java Mon Jul 21 14:14:53 2014
@@ -42,6 +42,7 @@ import java.io.*;
 * <tr><td>id</td><td>BIGINT</td><td>Primary Key</td></tr>
 * <tr><td>connectionname</td><td>VARCHAR(32)</td><td>Reference:outputconnections.connectionname</td></tr>
 * <tr><td>dockey</td><td>VARCHAR(73)</td><td></td></tr>
+* <tr><td>componenthash</td><td>VARCHAR(40)</td><td></td></tr>
 * <tr><td>docuri</td><td>LONGTEXT</td><td></td></tr>
 * <tr><td>urihash</td><td>VARCHAR(40)</td><td></td></tr>
 * <tr><td>lastversion</td><td>LONGTEXT</td><td></td></tr>
@@ -1351,12 +1352,12 @@ public class IncrementalIngester extends
           iter = docIdValues.iterator();
           j = 0;
           List<String> list2 = new ArrayList<String>();
-          maxClauses = maxClausesRowIdsForDocIds(outputConnectionName);
+          maxClauses = maxClausesRowIdsForDocIds(outputConnectionName,componentHash);
           while (iter.hasNext())
           {
             if (j == maxClauses)
             {
-              findRowIdsForDocIds(outputConnectionName,rowIDSet,list2);
+              findRowIdsForDocIds(outputConnectionName,rowIDSet,list2,componentHash);
               list2.clear();
               j = 0;
             }
@@ -1365,7 +1366,7 @@ public class IncrementalIngester extends
           }
 
           if (j > 0)
-            findRowIdsForDocIds(outputConnectionName,rowIDSet,list2);
+            findRowIdsForDocIds(outputConnectionName,rowIDSet,list2,componentHash);
 
           // Next, go through the list of row IDs, and delete them in chunks
           j = 0;
@@ -1455,6 +1456,15 @@ public class IncrementalIngester extends
       new UnitaryClause(outputConnNameField,outputConnectionName)});
   }
 
+    /** Calculate the maximum number of doc ids we should use.
+  */
+  protected int maxClausesRowIdsForDocIds(String outputConnectionName, String componentHash)
+  {
+    return findConjunctionClauseMax(new ClauseDescription[]{
+      new UnitaryClause(outputConnNameField,outputConnectionName),
+      new UnitaryClause(componentHashField,componentHash)});
+  }
+
   /** Calculate the maximum number of doc ids we should use.
   */
   protected int maxClausesRowIdsForDocIds(String[] outputConnectionNames)
@@ -1488,6 +1498,29 @@ public class IncrementalIngester extends
   /** Given values and parameters corresponding to a set of hash values, add corresponding
   * table row id's to the output map.
   */
+  protected void findRowIdsForDocIds(String outputConnectionName, Set<Long> rowIDSet, List<String> paramValues, String componentHash)
+    throws ManifoldCFException
+  {
+    ArrayList list = new ArrayList();
+    String query = buildConjunctionClause(list,new ClauseDescription[]{
+      new MultiClause(docKeyField,paramValues),
+      new UnitaryClause(outputConnNameField,outputConnectionName),
+      new UnitaryClause(componentHashField,componentHash)});
+      
+    IResultSet set = performQuery("SELECT "+idField+" FROM "+
+      getTableName()+" WHERE "+query,list,null,null);
+    
+    for (int i = 0; i < set.getRowCount(); i++)
+    {
+      IResultRow row = set.getRow(i);
+      Long rowID = (Long)row.getValue(idField);
+      rowIDSet.add(rowID);
+    }
+  }
+
+  /** Given values and parameters corresponding to a set of hash values, add corresponding
+  * table row id's to the output map.
+  */
   protected void findRowIdsForDocIds(String[] outputConnectionNames, Set<Long> rowIDSet, List<String> paramValues)
     throws ManifoldCFException
   {