You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2020/12/11 18:03:34 UTC
svn commit: r1884332 - in /manifoldcf/trunk: CHANGES.txt
connectors/solr/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/solr/SolrIngesterConnector.java
Author: kwright
Date: Fri Dec 11 18:03:33 2020
New Revision: 1884332
URL: http://svn.apache.org/viewvc?rev=1884332&view=rev
Log:
Apply patch for solr ingestor connector
Modified:
manifoldcf/trunk/CHANGES.txt
manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/solr/SolrIngesterConnector.java
Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1884332&r1=1884331&r2=1884332&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Fri Dec 11 18:03:33 2020
@@ -9,7 +9,7 @@ CONNECTORS-1660: Better handling of non-
NOTICKET: Add missing Jetty JSP jar so crawler UI works in the examples.
(Karl Wright)
-CONNECTORS-1653: Add contributed solr repository connector.
+CONNECTORS-1653: Add contributed solr repository connector, including patches.
(Olivier Tavard)
CONNECTORS-1655: Handle some forms of illegal content type.
Modified: manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/solr/SolrIngesterConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/solr/SolrIngesterConnector.java?rev=1884332&r1=1884331&r2=1884332&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/solr/SolrIngesterConnector.java (original)
+++ manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/solr/SolrIngesterConnector.java Fri Dec 11 18:03:33 2020
@@ -35,6 +35,7 @@ import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
+import java.util.TimeZone;
import java.util.concurrent.TimeUnit;
import javax.net.ssl.SSLContext;
@@ -101,11 +102,6 @@ public class SolrIngesterConnector exten
public static final String _rcsid = "@(#)$Id: solringesterConnector.java 994959 2010-09-08 10:04:42Z redguy $";
- /**
- * Deny access token for default authority
- */
- private final static String defaultAuthorityDenyToken = "__nosecurity__";
-
private final static String ACTION_PARAM_NAME = "action";
private final static String ACTION_CHECK = "check";
@@ -178,7 +174,7 @@ public class SolrIngesterConnector exten
@Override
public int getMaxDocumentRequest() {
- return 100;
+ return 1;
}
@Override
@@ -358,7 +354,7 @@ public class SolrIngesterConnector exten
String idFieldName = null;
String collection = null;
- final String dateField = null;
+ String dateField = null;
final String contentField = null;
String rowsNumberString = null;
String filter = null;
@@ -380,6 +376,9 @@ public class SolrIngesterConnector exten
if (sn.getType() == SolrIngesterConfig.ROWS_NUMBER) {
rowsNumberString = sn.getAttributeValue(SolrIngesterConfig.ATTRIBUTE_VALUE);
}
+ if (sn.getType() == SolrIngesterConfig.DATE_FIELD) {
+ dateField = sn.getAttributeValue(SolrIngesterConfig.ATTRIBUTE_VALUE);
+ }
if (sn.getType() == SolrIngesterConfig.FILTER_CONDITION) {
filter = sn.getAttributeValue(SolrIngesterConfig.ATTRIBUTE_VALUE);
@@ -392,10 +391,9 @@ public class SolrIngesterConnector exten
// Unpack seed time from seed version string
startTime = new Long(lastSeedVersion).longValue();
}
-
getSession();
-
- final SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.ROOT);
+ final SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
+ sdf.setTimeZone(TimeZone.getDefault());
final StringBuilder url = new StringBuilder(solringesterEntryPoint);
url.append("?").append(ACTION_PARAM_NAME).append("=").append(ACTION_SEED);
@@ -407,13 +405,19 @@ public class SolrIngesterConnector exten
// ExecuteSeedingThread t = new ExecuteSeedingThread(client, url.toString());
long dateSolr;
- if (lastSeedVersion != null) {
+ String filterDate = null;
+
+
+ if (lastSeedVersion != null && !lastSeedVersion.isEmpty() && !lastSeedVersion.contentEquals("0")) {
dateSolr = new Long(lastSeedVersion).longValue();
- } else {
+ String dateSolrString = sdf.format(dateSolr);
+ filterDate = dateField+":["+dateSolrString+ " TO NOW]";
+ }
+ else {
dateSolr = 0L;
}
- // String dateSolr = sdf.format(dateSolr);
+
final int rowsNumber = Integer.valueOf(rowsNumberString);
try {
@@ -423,16 +427,19 @@ public class SolrIngesterConnector exten
} else {
query = new SolrQuery("*:*").addFilterQuery(filter).setRows(rowsNumber).setSort(idFieldName, SolrQuery.ORDER.asc);
}
+ if (filterDate != null && !filterDate.isEmpty() && !filterDate.contentEquals("0")) {
+ query.addFilterQuery(filterDate);
+ }
query.setFields(idFieldName);
String cursorMark = CursorMarkParams.CURSOR_MARK_START;
boolean done = false;
while (!done) {
query.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark);
QueryResponse response;
-
response = httpSolrClient.query(collection, query);
final String nextCursorMark = response.getNextCursorMark();
final SolrDocumentList documents = response.getResults();
+
for (final SolrDocument document : documents) {
activities.addSeedDocument((String) document.getFieldValue(idFieldName));
}
@@ -453,8 +460,8 @@ public class SolrIngesterConnector exten
@Override
public void processDocuments(final String[] documentIdentifiers, final IExistingVersions statuses, final Specification spec, final IProcessActivity activities, final int jobMode,
final boolean usesDefaultAuthority) throws ManifoldCFException, ServiceInterruption {
-
- getSession();
+
+ getSession();
if (Logging.connectors.isDebugEnabled()) {
Logging.connectors.debug("SolrIngester: ProcessDocuments method");
@@ -656,10 +663,8 @@ public class SolrIngesterConnector exten
is = new ByteArrayInputStream(contentFieldValuesString.getBytes());
// security part
-
- if (securityActivated = true) {
+ if (securityActivated == true) {
-
if (Logging.connectors.isDebugEnabled()) {
Logging.connectors.debug("Security part");
}
@@ -685,25 +690,23 @@ public class SolrIngesterConnector exten
all.clear();
all.addAll(hs);
securityValues = all.toArray(new String[0]);
- doc.setSecurity(RepositoryDocument.SECURITY_TYPE_DOCUMENT, securityValues, new String[] { defaultAuthorityDenyToken });
+ doc.setSecurity(RepositoryDocument.SECURITY_TYPE_DOCUMENT, securityValues, new String[] { GLOBAL_DENY_TOKEN });
securityFieldValues = null;
securityFieldValues2 = null;
} else if (document.getFieldValues(securityField) != null) {
ArrayList<Object> securityFieldValues = null;
securityFieldValues = (ArrayList<Object>) document.getFieldValues(securityField);
String[] tabsecurityFieldValues = securityFieldValues.toArray(new String[0]);
- doc.setSecurity(RepositoryDocument.SECURITY_TYPE_DOCUMENT, tabsecurityFieldValues, new String[] { defaultAuthorityDenyToken });
+ doc.setSecurity(RepositoryDocument.SECURITY_TYPE_DOCUMENT, tabsecurityFieldValues, new String[] { GLOBAL_DENY_TOKEN });
securityFieldValues = null;
tabsecurityFieldValues = null;
} else if (document.getFieldValues(securityField2) != null) {
ArrayList<Object> securityFieldValues2 = null;
securityFieldValues2 = (ArrayList<Object>) document.getFieldValues(securityField2);
String[] tabsecurityFieldValues2 = securityFieldValues2.toArray(new String[0]);
- doc.setSecurity(RepositoryDocument.SECURITY_TYPE_DOCUMENT, tabsecurityFieldValues2, new String[] { defaultAuthorityDenyToken });
+ doc.setSecurity(RepositoryDocument.SECURITY_TYPE_DOCUMENT, tabsecurityFieldValues2, new String[] { GLOBAL_DENY_TOKEN });
securityFieldValues2 = null;
tabsecurityFieldValues2 = null;
- } else {
- doc.setSecurity(RepositoryDocument.SECURITY_TYPE_DOCUMENT, new String[] { "__nosecurity__" }, new String[] { "__nosecurity__" });
}
}