You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2020/12/11 18:03:34 UTC

svn commit: r1884332 - in /manifoldcf/trunk: CHANGES.txt connectors/solr/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/solr/SolrIngesterConnector.java

Author: kwright
Date: Fri Dec 11 18:03:33 2020
New Revision: 1884332

URL: http://svn.apache.org/viewvc?rev=1884332&view=rev
Log:
Apply patch for solr ingestor connector

Modified:
    manifoldcf/trunk/CHANGES.txt
    manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/solr/SolrIngesterConnector.java

Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1884332&r1=1884331&r2=1884332&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Fri Dec 11 18:03:33 2020
@@ -9,7 +9,7 @@ CONNECTORS-1660: Better handling of non-
 NOTICKET: Add missing Jetty JSP jar so crawler UI works in the examples.
 (Karl Wright)
 
-CONNECTORS-1653: Add contributed solr repository connector.
+CONNECTORS-1653: Add contributed solr repository connector, including patches.
 (Olivier Tavard)
 
 CONNECTORS-1655: Handle some forms of illegal content type.

Modified: manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/solr/SolrIngesterConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/solr/SolrIngesterConnector.java?rev=1884332&r1=1884331&r2=1884332&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/solr/SolrIngesterConnector.java (original)
+++ manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/solr/SolrIngesterConnector.java Fri Dec 11 18:03:33 2020
@@ -35,6 +35,7 @@ import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
+import java.util.TimeZone;
 import java.util.concurrent.TimeUnit;
 
 import javax.net.ssl.SSLContext;
@@ -101,11 +102,6 @@ public class SolrIngesterConnector exten
 
   public static final String _rcsid = "@(#)$Id: solringesterConnector.java 994959 2010-09-08 10:04:42Z redguy $";
 
-  /**
-   * Deny access token for default authority
-   */
-  private final static String defaultAuthorityDenyToken = "__nosecurity__";
-
   private final static String ACTION_PARAM_NAME = "action";
 
   private final static String ACTION_CHECK = "check";
@@ -178,7 +174,7 @@ public class SolrIngesterConnector exten
 
   @Override
   public int getMaxDocumentRequest() {
-    return 100;
+    return 1;
   }
 
   @Override
@@ -358,7 +354,7 @@ public class SolrIngesterConnector exten
 
     String idFieldName = null;
     String collection = null;
-    final String dateField = null;
+    String dateField = null;
     final String contentField = null;
     String rowsNumberString = null;
     String filter = null;
@@ -380,6 +376,9 @@ public class SolrIngesterConnector exten
       if (sn.getType() == SolrIngesterConfig.ROWS_NUMBER) {
         rowsNumberString = sn.getAttributeValue(SolrIngesterConfig.ATTRIBUTE_VALUE);
       }
+      if (sn.getType() == SolrIngesterConfig.DATE_FIELD) {
+        dateField = sn.getAttributeValue(SolrIngesterConfig.ATTRIBUTE_VALUE);
+      }
 
       if (sn.getType() == SolrIngesterConfig.FILTER_CONDITION) {
         filter = sn.getAttributeValue(SolrIngesterConfig.ATTRIBUTE_VALUE);
@@ -392,10 +391,9 @@ public class SolrIngesterConnector exten
       // Unpack seed time from seed version string
       startTime = new Long(lastSeedVersion).longValue();
     }
-
     getSession();
-
-    final SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.ROOT);
+    final SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
+    sdf.setTimeZone(TimeZone.getDefault());
 
     final StringBuilder url = new StringBuilder(solringesterEntryPoint);
     url.append("?").append(ACTION_PARAM_NAME).append("=").append(ACTION_SEED);
@@ -407,13 +405,19 @@ public class SolrIngesterConnector exten
     // ExecuteSeedingThread t = new ExecuteSeedingThread(client, url.toString());
 
     long dateSolr;
-    if (lastSeedVersion != null) {
+    String filterDate = null;
+
+    
+    if (lastSeedVersion != null && !lastSeedVersion.isEmpty() && !lastSeedVersion.contentEquals("0")) {
       dateSolr = new Long(lastSeedVersion).longValue();
-    } else {
+      String dateSolrString = sdf.format(dateSolr);
+      filterDate = dateField+":["+dateSolrString+ " TO NOW]";
+    }
+    else {
       dateSolr = 0L;
     }
 
-    // String dateSolr = sdf.format(dateSolr);
+  
 
     final int rowsNumber = Integer.valueOf(rowsNumberString);
     try {
@@ -423,16 +427,19 @@ public class SolrIngesterConnector exten
       } else {
         query = new SolrQuery("*:*").addFilterQuery(filter).setRows(rowsNumber).setSort(idFieldName, SolrQuery.ORDER.asc);
       }
+      if (filterDate != null && !filterDate.isEmpty() && !filterDate.contentEquals("0")) {
+        query.addFilterQuery(filterDate);
+      }
       query.setFields(idFieldName);
       String cursorMark = CursorMarkParams.CURSOR_MARK_START;
       boolean done = false;
       while (!done) {
         query.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark);
         QueryResponse response;
-
         response = httpSolrClient.query(collection, query);
         final String nextCursorMark = response.getNextCursorMark();
         final SolrDocumentList documents = response.getResults();
+        
         for (final SolrDocument document : documents) {
           activities.addSeedDocument((String) document.getFieldValue(idFieldName));
         }
@@ -453,8 +460,8 @@ public class SolrIngesterConnector exten
   @Override
   public void processDocuments(final String[] documentIdentifiers, final IExistingVersions statuses, final Specification spec, final IProcessActivity activities, final int jobMode,
       final boolean usesDefaultAuthority) throws ManifoldCFException, ServiceInterruption {
-
-    getSession();
+    
+        getSession();
 
     if (Logging.connectors.isDebugEnabled()) {
       Logging.connectors.debug("SolrIngester: ProcessDocuments method");
@@ -656,10 +663,8 @@ public class SolrIngesterConnector exten
               is = new ByteArrayInputStream(contentFieldValuesString.getBytes());
 
               // security part
-
-              if (securityActivated = true) {
+              if (securityActivated == true) {
                 
-
                 if (Logging.connectors.isDebugEnabled()) {
                   Logging.connectors.debug("Security part");
                 }
@@ -685,25 +690,23 @@ public class SolrIngesterConnector exten
                   all.clear();
                   all.addAll(hs);
                   securityValues = all.toArray(new String[0]);
-                  doc.setSecurity(RepositoryDocument.SECURITY_TYPE_DOCUMENT, securityValues, new String[] { defaultAuthorityDenyToken });
+                  doc.setSecurity(RepositoryDocument.SECURITY_TYPE_DOCUMENT, securityValues, new String[] { GLOBAL_DENY_TOKEN });
                   securityFieldValues = null;
                   securityFieldValues2 = null;
                 } else if (document.getFieldValues(securityField) != null) {
                   ArrayList<Object> securityFieldValues = null;
                   securityFieldValues = (ArrayList<Object>) document.getFieldValues(securityField);
                   String[] tabsecurityFieldValues = securityFieldValues.toArray(new String[0]);
-                  doc.setSecurity(RepositoryDocument.SECURITY_TYPE_DOCUMENT, tabsecurityFieldValues, new String[] { defaultAuthorityDenyToken });
+                  doc.setSecurity(RepositoryDocument.SECURITY_TYPE_DOCUMENT, tabsecurityFieldValues, new String[] { GLOBAL_DENY_TOKEN });
                   securityFieldValues = null;
                   tabsecurityFieldValues = null;
                 } else if (document.getFieldValues(securityField2) != null) {
                   ArrayList<Object> securityFieldValues2 = null;
                   securityFieldValues2 = (ArrayList<Object>) document.getFieldValues(securityField2);
                   String[] tabsecurityFieldValues2 = securityFieldValues2.toArray(new String[0]);
-                  doc.setSecurity(RepositoryDocument.SECURITY_TYPE_DOCUMENT, tabsecurityFieldValues2, new String[] { defaultAuthorityDenyToken });
+                  doc.setSecurity(RepositoryDocument.SECURITY_TYPE_DOCUMENT, tabsecurityFieldValues2, new String[] { GLOBAL_DENY_TOKEN });
                   securityFieldValues2 = null;
                   tabsecurityFieldValues2 = null;
-                } else {
-                  doc.setSecurity(RepositoryDocument.SECURITY_TYPE_DOCUMENT, new String[] { "__nosecurity__" }, new String[] { "__nosecurity__" });
                 }
 
               }