You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/10/30 08:57:55 UTC

svn commit: r1635422 - in /manifoldcf/branches/dev_1x: ./ connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxRepositoryConnector.java

Author: kwright
Date: Thu Oct 30 07:57:55 2014
New Revision: 1635422

URL: http://svn.apache.org/r1635422
Log:
Pull up fix for CONNECTORS-1077 for dropbox connector

Modified:
    manifoldcf/branches/dev_1x/   (props changed)
    manifoldcf/branches/dev_1x/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxRepositoryConnector.java

Propchange: manifoldcf/branches/dev_1x/
------------------------------------------------------------------------------
  Merged /manifoldcf/trunk:r1635421

Modified: manifoldcf/branches/dev_1x/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxRepositoryConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxRepositoryConnector.java?rev=1635422&r1=1635421&r2=1635422&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxRepositoryConnector.java (original)
+++ manifoldcf/branches/dev_1x/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxRepositoryConnector.java Thu Oct 30 07:57:55 2014
@@ -34,6 +34,7 @@ import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
 import java.util.Iterator;
+import java.util.Date;
 import org.apache.manifoldcf.crawler.system.Logging;
 import org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnector;
 import org.apache.manifoldcf.agents.interfaces.ServiceInterruption;
@@ -889,8 +890,8 @@ public class DropboxRepositoryConnector 
       
       String versionString;
       GetObjectThread objt = new GetObjectThread(documentIdentifier);
+      objt.start();
       try {
-        objt.start();
         objt.finishUp();
       } catch (InterruptedException e) {
         objt.interrupt();
@@ -903,149 +904,190 @@ public class DropboxRepositoryConnector 
 
       DropboxAPI.Entry dropboxObject = objt.getResponse();
 
-      if (!dropboxObject.isDir) {
-        if (dropboxObject.isDeleted) {
-          activities.deleteDocument(documentIdentifier);
-          continue;
-        } else if (StringUtils.isNotEmpty(dropboxObject.rev)) {
-          StringBuilder sb = new StringBuilder();
-
-          // Acls
-          packList(sb,acls,'+');
-          if (acls.length > 0) {
-            sb.append('+');
-            pack(sb,defaultAuthorityDenyToken,'+');
-          }
-          else
-            sb.append('-');
-
-          sb.append(dropboxObject.rev);
-          versionString = sb.toString();
-        } else {
-          //a document that doesn't contain versioning information will never be processed
-          activities.deleteDocument(documentIdentifier);
-          continue;
-        }
-      } else {
+      if (dropboxObject.isDir) {
         //a folder will always be processed
         versionString = StringUtils.EMPTY;
-      }
-    
-      if (versionString.length() == 0 || activities.checkDocumentNeedsReindexing(documentIdentifier,versionString))
-      {
-        long startTime = System.currentTimeMillis();
-        String errorCode = "FAILED";
-        String errorDesc = StringUtils.EMPTY;
-        Long fileSize = null;
-        boolean doLog = false;
-        String nodeId = documentIdentifier;
-        String version = versionString;
         
-        try {
-          if (dropboxObject.isDir) {
+        // adding all the children + subdirs for a folder
 
-            // adding all the children + subdirs for a folder
+        List<DropboxAPI.Entry> children = dropboxObject.contents;
+        for (DropboxAPI.Entry child : children) {
+          activities.addDocumentReference(child.path, documentIdentifier, RELATIONSHIP_CHILD);
+        }
 
-            List<DropboxAPI.Entry> children = dropboxObject.contents;
-            for (DropboxAPI.Entry child : children) {
-              activities.addDocumentReference(child.path, nodeId, RELATIONSHIP_CHILD);
-            }
+        activities.noDocument(documentIdentifier,versionString);
+        continue;
+      }
+      
+      if (dropboxObject.isDeleted) {
+        activities.deleteDocument(documentIdentifier);
+        continue;
+      }
 
-          } else {
-            // its a file
-            doLog = true;
-              
-            // content ingestion
-            RepositoryDocument rd = new RepositoryDocument();
+      if (StringUtils.isEmpty(dropboxObject.rev)) {
+        //a document that doesn't contain versioning information will never be processed
+        activities.deleteDocument(documentIdentifier);
+        continue;
+      }
+      
+      StringBuilder sb = new StringBuilder();
 
-            if (acls.length > 0) {
-              rd.setSecurityACL(RepositoryDocument.SECURITY_TYPE_DOCUMENT,acls);
-              String[] denyAclArray = new String[]{defaultAuthorityDenyToken};
-              rd.setSecurityDenyACL(RepositoryDocument.SECURITY_TYPE_DOCUMENT,denyAclArray);
-            }
+      // Acls
+      packList(sb,acls,'+');
+      if (acls.length > 0) {
+        sb.append('+');
+        pack(sb,defaultAuthorityDenyToken,'+');
+      }
+      else
+        sb.append('-');
+
+      sb.append(dropboxObject.rev);
+      versionString = sb.toString();
+    
+      if (!activities.checkDocumentNeedsReindexing(documentIdentifier,versionString))
+        continue;
+      
+      long startTime = System.currentTimeMillis();
+      String errorCode = null;
+      String errorDesc = null;
+      Long fileSize = null;
+      String nodeId = documentIdentifier;
+      String version = versionString;
+        
+      try {
+        // Length in bytes
+        long fileLength = dropboxObject.bytes;
+        if (!activities.checkLengthIndexable(fileLength))
+        {
+          errorCode = activities.EXCLUDED_LENGTH;
+          errorDesc = "Document excluded because of length ("+fileLength+")";
+          activities.noDocument(documentIdentifier,versionString);
+          continue;
+        }
+        
+        //documentURI
+        String documentURI = dropboxObject.path;
+        if (!activities.checkURLIndexable(documentURI))
+        {
+          errorCode = activities.EXCLUDED_URL;
+          errorDesc = "Document excluded because of URL ('"+documentURI+"')";
+          activities.noDocument(documentIdentifier,versionString);
+          continue;
+        }
+
+        //Modified date
+        Date modifiedDate;
+        if (dropboxObject.modified != null)
+          modifiedDate = com.dropbox.client2.RESTUtility.parseDate(dropboxObject.modified);
+        else
+          modifiedDate = null;
+        if (!activities.checkDateIndexable(modifiedDate))
+        {
+          errorCode = activities.EXCLUDED_DATE;
+          errorDesc = "Document excluded because of date ("+modifiedDate+")";
+          activities.noDocument(documentIdentifier,versionString);
+          continue;
+        }
+        
+        // Mime type
+        String mimeType = dropboxObject.mimeType;
+        if (!activities.checkMimeTypeIndexable(mimeType))
+        {
+          errorCode = activities.EXCLUDED_MIMETYPE;
+          errorDesc = "Document excluded because of mime type ('"+mimeType+"')";
+          activities.noDocument(documentIdentifier,versionString);
+          continue;
+        }
+        
+        // content ingestion
+        RepositoryDocument rd = new RepositoryDocument();
+
+        if (acls.length > 0) {
+          rd.setSecurityACL(RepositoryDocument.SECURITY_TYPE_DOCUMENT,acls);
+          String[] denyAclArray = new String[]{defaultAuthorityDenyToken};
+          rd.setSecurityDenyACL(RepositoryDocument.SECURITY_TYPE_DOCUMENT,denyAclArray);
+        }
             
-            // Length in bytes
-            long fileLength = dropboxObject.bytes;
-            //documentURI
-            String documentURI = dropboxObject.path;
-
-            if (dropboxObject.path != null)
-              rd.setFileName(dropboxObject.path);
-            if (dropboxObject.mimeType != null)
-              rd.setMimeType(dropboxObject.mimeType);
-            if (dropboxObject.modified != null)
-              rd.setModifiedDate(com.dropbox.client2.RESTUtility.parseDate(dropboxObject.modified));
-            // There doesn't appear to be a created date...
+
+        if (dropboxObject.path != null)
+          rd.setFileName(dropboxObject.path);
+        if (dropboxObject.mimeType != null)
+          rd.setMimeType(dropboxObject.mimeType);
+        if (dropboxObject.modified != null)
+          rd.setModifiedDate(modifiedDate);
+        // There doesn't appear to be a created date...
                 
-            rd.addField("Modified", dropboxObject.modified);
-            rd.addField("Size", dropboxObject.size);
-            rd.addField("Path", dropboxObject.path);
-            rd.addField("Root", dropboxObject.root);
-            rd.addField("ClientMtime", dropboxObject.clientMtime);
-            rd.addField("mimeType", dropboxObject.mimeType);
-            rd.addField("rev", dropboxObject.rev);
+        rd.addField("Modified", dropboxObject.modified);
+        rd.addField("Size", dropboxObject.size);
+        rd.addField("Path", dropboxObject.path);
+        rd.addField("Root", dropboxObject.root);
+        rd.addField("ClientMtime", dropboxObject.clientMtime);
+        rd.addField("mimeType", dropboxObject.mimeType);
+        rd.addField("rev", dropboxObject.rev);
               
-            getSession();
-            BackgroundStreamThread t = new BackgroundStreamThread(nodeId);
+        getSession();
+        BackgroundStreamThread t = new BackgroundStreamThread(nodeId);
+        t.start();
+        try {
+          boolean wasInterrupted = false;
+          try {
+            InputStream is = t.getSafeInputStream();
             try {
-              t.start();
-              boolean wasInterrupted = false;
-              try {
-                InputStream is = t.getSafeInputStream();
-                try {
-                  rd.setBinary(is, fileLength);
-                  activities.ingestDocumentWithException(nodeId, version, documentURI, rd);
-                } finally {
-                  is.close();
-                }
-              } catch (java.net.SocketTimeoutException e) {
-                throw e;
-              } catch (InterruptedIOException e) {
-                wasInterrupted = true;
-                throw e;
-              } catch (ManifoldCFException e) {
-                if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
-                  wasInterrupted = true;
-                throw e;
-              } finally {
-                if (!wasInterrupted)
-                  // This does a join
-                  t.finishUp();
-              }
-
+              rd.setBinary(is, fileLength);
+              activities.ingestDocumentWithException(nodeId, version, documentURI, rd);
               // No errors.  Record the fact that we made it.
               errorCode = "OK";
               fileSize = new Long(fileLength);
-            } catch (InterruptedException e) {
-              // We were interrupted out of the join, most likely.  Before we abandon the thread,
-              // send a courtesy interrupt.
-              t.interrupt();
-              throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
-                ManifoldCFException.INTERRUPTED);
-            } catch (java.net.SocketTimeoutException e) {
-              errorCode = "IO ERROR";
-              errorDesc = e.getMessage();
-              handleIOException(e);
-            } catch (InterruptedIOException e) {
-              t.interrupt();
-              throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
-                ManifoldCFException.INTERRUPTED);
-            } catch (IOException e) {
-              errorCode = "IO ERROR";
-              errorDesc = e.getMessage();
-              handleIOException(e);
-            } catch (DropboxException e) {
-              Logging.connectors.warn("DROPBOX: Error getting stream: " + e.getMessage(), e);
-              errorCode = "DROPBOX ERROR";
-              errorDesc = e.getMessage();
-              handleDropboxException(e);
+            } finally {
+              is.close();
             }
+          } catch (java.net.SocketTimeoutException e) {
+            throw e;
+          } catch (InterruptedIOException e) {
+            wasInterrupted = true;
+            throw e;
+          } catch (ManifoldCFException e) {
+            if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
+              wasInterrupted = true;
+            throw e;
+          } finally {
+            if (!wasInterrupted)
+              // This does a join
+              t.finishUp();
           }
-        } finally {
-          if (doLog)
-            activities.recordActivity(new Long(startTime), ACTIVITY_READ,
-              fileSize, nodeId, errorCode, errorDesc, null);
+
+        } catch (InterruptedException e) {
+          // We were interrupted out of the join, most likely.  Before we abandon the thread,
+          // send a courtesy interrupt.
+          t.interrupt();
+          throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
+            ManifoldCFException.INTERRUPTED);
+        } catch (java.net.SocketTimeoutException e) {
+          errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
+          errorDesc = e.getMessage();
+          handleIOException(e);
+        } catch (InterruptedIOException e) {
+          t.interrupt();
+          throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
+            ManifoldCFException.INTERRUPTED);
+        } catch (IOException e) {
+          errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
+          errorDesc = e.getMessage();
+          handleIOException(e);
+        } catch (DropboxException e) {
+          Logging.connectors.warn("DROPBOX: Error getting stream: " + e.getMessage(), e);
+          errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
+          errorDesc = e.getMessage();
+          handleDropboxException(e);
         }
+      } catch (ManifoldCFException e) {
+        if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
+          errorCode = null;
+        throw e;
+      } finally {
+        if (errorCode != null)
+          activities.recordActivity(new Long(startTime), ACTIVITY_READ,
+            fileSize, nodeId, errorCode, errorDesc, null);
       }
     }
   }