You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/10/21 13:35:14 UTC

svn commit: r1633345 - in /manifoldcf/trunk/connectors: alfresco-webscript/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfrescowebscript/ alfresco/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfresco/ cmis/conn...

Author: kwright
Date: Tue Oct 21 11:35:13 2014
New Revision: 1633345

URL: http://svn.apache.org/r1633345
Log:
CONNECTORS-1077 work for alfresco-webscript connector

Modified:
    manifoldcf/trunk/connectors/alfresco-webscript/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfrescowebscript/AlfrescoConnector.java
    manifoldcf/trunk/connectors/alfresco/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfresco/AlfrescoRepositoryConnector.java
    manifoldcf/trunk/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java

Modified: manifoldcf/trunk/connectors/alfresco-webscript/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfrescowebscript/AlfrescoConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/alfresco-webscript/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfrescowebscript/AlfrescoConnector.java?rev=1633345&r1=1633344&r2=1633345&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/alfresco-webscript/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfrescowebscript/AlfrescoConnector.java (original)
+++ manifoldcf/trunk/connectors/alfresco-webscript/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfrescowebscript/AlfrescoConnector.java Tue Oct 21 11:35:13 2014
@@ -41,8 +41,10 @@ import java.util.*;
 
 
 public class AlfrescoConnector extends BaseRepositoryConnector {
+	
   private static final String ACTIVITY_FETCH = "fetch document";
   private static final String[] activitiesList = new String[]{ACTIVITY_FETCH};
+  
   private AlfrescoClient alfrescoClient;
 
   private static final String CONTENT_URL_PROPERTY = "contentUrlPath";
@@ -197,6 +199,12 @@ public class AlfrescoConnector extends B
 
     boolean enableDocumentProcessing = ConfigurationHandler.getEnableDocumentProcessing(spec);
     for (String doc : documentIdentifiers) {
+      
+      String errorCode = null;
+      String errorDesc = null;
+      Long fileLengthLong = null;
+      long startTime = System.currentTimeMillis();
+
       try {
 
         String nextVersion = statuses.getIndexedVersionString(doc);	
@@ -269,19 +277,40 @@ public class AlfrescoConnector extends B
 
         if (lSize != null && !activities.checkLengthIndexable(lSize.longValue())) {
           activities.noDocument(doc, documentVersion);
+          errorCode = activities.EXCLUDED_LENGTH;
+          errorDesc = "Excluding document because of length ("+lSize+")";
           continue;
         }
         
         if (!activities.checkMimeTypeIndexable(mimeType)) {
           activities.noDocument(doc, documentVersion);
+          errorCode = activities.EXCLUDED_MIMETYPE;
+          errorDesc = "Excluding document because of mime type ("+mimeType+")";
           continue;
         }
 
         if (!activities.checkDateIndexable(modifiedDate)) {
           activities.noDocument(doc, documentVersion);
+          errorCode = activities.EXCLUDED_DATE;
+          errorDesc = "Excluding document because of date ("+modifiedDate+")";
+          continue;
+        }
+        
+        String contentUrlPath = (String) properties.get(CONTENT_URL_PROPERTY);
+        if (contentUrlPath == null || contentUrlPath.isEmpty()) {
+          activities.noDocument(doc, documentVersion);
+          errorCode = "NOURL";
+          errorDesc = "Excluding document because no URL found";
           continue;
         }
         
+        if (!activities.checkURLIndexable(contentUrlPath)) {
+          activities.noDocument(doc, documentVersion);
+          errorCode = activities.EXCLUDED_URL;
+          errorDesc = "Excluding document because of URL ('"+contentUrlPath+"')";
+          continue;
+        }
+
         RepositoryDocument rd = new RepositoryDocument();
         rd.addField(FIELD_NODEREF, nodeRef);
         rd.addField(FIELD_TYPE, type);
@@ -313,17 +342,14 @@ public class AlfrescoConnector extends B
         InputStream stream;
         long length;
         byte[] empty = new byte[0];
-        String contentUrlPath = (String) properties.get(CONTENT_URL_PROPERTY);
-        if (contentUrlPath == null || contentUrlPath.isEmpty()) {
-          activities.noDocument(doc, documentVersion);
-          continue;
-        }
         
         if (enableDocumentProcessing) {
           if (lSize != null) {
             stream = alfrescoClient.fetchContent(contentUrlPath);
             if (stream == null) {
               activities.noDocument(doc, documentVersion);
+              errorCode = "NOSTREAM";
+              errorDesc = "Excluding document because no content stream found";
               continue;
             }
             length = lSize.longValue();
@@ -341,6 +367,8 @@ public class AlfrescoConnector extends B
           if (Logging.connectors != null && Logging.connectors.isDebugEnabled())
             Logging.connectors.debug(MessageFormat.format("Ingesting with id: {0}, URI {1} and rd {2}", new Object[]{uuid, nodeRef, rd.getFileName()}));
           activities.ingestDocumentWithException(doc, documentVersion, contentUrlPath, rd);
+          errorCode = "OK";
+          fileLengthLong = new Long(length);
         } catch (IOException e) {
           handleIOException(e,"reading stream");
         } finally {
@@ -353,6 +381,14 @@ public class AlfrescoConnector extends B
 
       } catch (AlfrescoDownException e) {
         handleAlfrescoDownException(e,"processing");
+      } catch (ManifoldCFException e) {
+        if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
+          errorCode = null;
+        throw e;
+      } finally {
+        if (errorCode != null)
+          activities.recordActivity(new Long(startTime), ACTIVITY_FETCH,
+            fileLengthLong, doc, errorCode, errorDesc, null);
       }
     }
   }

Modified: manifoldcf/trunk/connectors/alfresco/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfresco/AlfrescoRepositoryConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/alfresco/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfresco/AlfrescoRepositoryConnector.java?rev=1633345&r1=1633344&r2=1633345&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/alfresco/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfresco/AlfrescoRepositoryConnector.java (original)
+++ manifoldcf/trunk/connectors/alfresco/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfresco/AlfrescoRepositoryConnector.java Tue Oct 21 11:35:13 2014
@@ -913,12 +913,8 @@ public class AlfrescoRepositoryConnector
             } 
 
           }catch(IOException e){
-            if (e instanceof java.io.InterruptedIOException && !(e instanceof java.net.SocketTimeoutException))
-              errorCode = null;
-            else {
-              errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
-              errorDesc = e.getMessage();
-            }
+            errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
+            errorDesc = e.getMessage();
             Logging.connectors.warn(
                 "Alfresco: IOException finding children: "
                     + e.getMessage(), e);
@@ -971,12 +967,8 @@ public class AlfrescoRepositoryConnector
                       + e.getMessage(), e);
               handleParseException(e);
             } catch (IOException e) {
-              if (e instanceof java.io.InterruptedIOException && !(e instanceof java.net.SocketTimeoutException))
-                errorCode = null;
-              else {
-                errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
-                errorDesc = e.getMessage();
-              }
+              errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
+              errorDesc = e.getMessage();
               Logging.connectors.warn(
                   "Alfresco: IOException: "
                       + e.getMessage(), e);
@@ -1002,6 +994,10 @@ public class AlfrescoRepositoryConnector
             }
 
           }
+        } catch (ManifoldCFException e) {
+          if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
+            errorCode = null;
+          throw e;
         } finally {
           if (errorCode != null)
             activities.recordActivity(new Long(startTime), ACTIVITY_READ,

Modified: manifoldcf/trunk/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java?rev=1633345&r1=1633344&r2=1633345&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java (original)
+++ manifoldcf/trunk/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java Tue Oct 21 11:35:13 2014
@@ -1326,10 +1326,7 @@ public class CmisRepositoryConnector ext
                 fileLengthLong = new Long(fileLength);
                 errorCode = "OK";
               } catch (IOException e) {
-                if (e instanceof InterruptedIOException && !(e instanceof java.net.SocketTimeoutException))
-                  errorCode = null;
-                else
-                  errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
+                errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
                 errorDesc = e.getMessage();
                 handleIOException(e, "reading file input stream");
               }
@@ -1339,10 +1336,7 @@ public class CmisRepositoryConnector ext
                   is.close();
                 }
               } catch (IOException e) {
-                if (e instanceof InterruptedIOException && !(e instanceof java.net.SocketTimeoutException))
-                  errorCode = null;
-                else
-                  errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
+                errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
                 errorDesc = e.getMessage();
                 handleIOException(e, "closing file input stream");
               }
@@ -1353,6 +1347,10 @@ public class CmisRepositoryConnector ext
             errorCode = "UNKNOWNTYPE";
             errorDesc = "Document type is unrecognized: '"+baseTypeId+"'";
           }
+        } catch (ManifoldCFException e) {
+          if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
+            errorCode = null;
+          throw e;
         } finally {
           if (errorCode != null)
             activities.recordActivity(new Long(startTime), ACTIVITY_READ,