You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/07/21 16:14:53 UTC
svn commit: r1612276 -
/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java
Author: kwright
Date: Mon Jul 21 14:14:53 2014
New Revision: 1612276
URL: http://svn.apache.org/r1612276
Log:
removeDocument wasn't quite right before; worked more like deleteDocument
Modified:
manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java
Modified: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java?rev=1612276&r1=1612275&r2=1612276&view=diff
==============================================================================
--- manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java (original)
+++ manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java Mon Jul 21 14:14:53 2014
@@ -42,6 +42,7 @@ import java.io.*;
* <tr><td>id</td><td>BIGINT</td><td>Primary Key</td></tr>
* <tr><td>connectionname</td><td>VARCHAR(32)</td><td>Reference:outputconnections.connectionname</td></tr>
* <tr><td>dockey</td><td>VARCHAR(73)</td><td></td></tr>
+* <tr><td>componenthash</td><td>VARCHAR(40)</td><td></td></tr>
* <tr><td>docuri</td><td>LONGTEXT</td><td></td></tr>
* <tr><td>urihash</td><td>VARCHAR(40)</td><td></td></tr>
* <tr><td>lastversion</td><td>LONGTEXT</td><td></td></tr>
@@ -1351,12 +1352,12 @@ public class IncrementalIngester extends
iter = docIdValues.iterator();
j = 0;
List<String> list2 = new ArrayList<String>();
- maxClauses = maxClausesRowIdsForDocIds(outputConnectionName);
+ maxClauses = maxClausesRowIdsForDocIds(outputConnectionName,componentHash);
while (iter.hasNext())
{
if (j == maxClauses)
{
- findRowIdsForDocIds(outputConnectionName,rowIDSet,list2);
+ findRowIdsForDocIds(outputConnectionName,rowIDSet,list2,componentHash);
list2.clear();
j = 0;
}
@@ -1365,7 +1366,7 @@ public class IncrementalIngester extends
}
if (j > 0)
- findRowIdsForDocIds(outputConnectionName,rowIDSet,list2);
+ findRowIdsForDocIds(outputConnectionName,rowIDSet,list2,componentHash);
// Next, go through the list of row IDs, and delete them in chunks
j = 0;
@@ -1455,6 +1456,15 @@ public class IncrementalIngester extends
new UnitaryClause(outputConnNameField,outputConnectionName)});
}
+ /** Calculate the maximum number of doc ids we should use.
+ */
+ protected int maxClausesRowIdsForDocIds(String outputConnectionName, String componentHash)
+ {
+ return findConjunctionClauseMax(new ClauseDescription[]{
+ new UnitaryClause(outputConnNameField,outputConnectionName),
+ new UnitaryClause(componentHashField,componentHash)});
+ }
+
/** Calculate the maximum number of doc ids we should use.
*/
protected int maxClausesRowIdsForDocIds(String[] outputConnectionNames)
@@ -1488,6 +1498,29 @@ public class IncrementalIngester extends
/** Given values and parameters corresponding to a set of hash values, add corresponding
* table row id's to the output map.
*/
+ protected void findRowIdsForDocIds(String outputConnectionName, Set<Long> rowIDSet, List<String> paramValues, String componentHash)
+ throws ManifoldCFException
+ {
+ ArrayList list = new ArrayList();
+ String query = buildConjunctionClause(list,new ClauseDescription[]{
+ new MultiClause(docKeyField,paramValues),
+ new UnitaryClause(outputConnNameField,outputConnectionName),
+ new UnitaryClause(componentHashField,componentHash)});
+
+ IResultSet set = performQuery("SELECT "+idField+" FROM "+
+ getTableName()+" WHERE "+query,list,null,null);
+
+ for (int i = 0; i < set.getRowCount(); i++)
+ {
+ IResultRow row = set.getRow(i);
+ Long rowID = (Long)row.getValue(idField);
+ rowIDSet.add(rowID);
+ }
+ }
+
+ /** Given values and parameters corresponding to a set of hash values, add corresponding
+ * table row id's to the output map.
+ */
protected void findRowIdsForDocIds(String[] outputConnectionNames, Set<Long> rowIDSet, List<String> paramValues)
throws ManifoldCFException
{