You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2013/07/24 11:07:46 UTC

svn commit: r1506452 - in /manifoldcf/branches/release-1.3-branch: ./ CHANGES.txt connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveRepositoryConnector.java

Author: kwright
Date: Wed Jul 24 09:07:45 2013
New Revision: 1506452

URL: http://svn.apache.org/r1506452
Log:
Pull up fix for CONNECTORS-757 in prep for RC2.

Modified:
    manifoldcf/branches/release-1.3-branch/   (props changed)
    manifoldcf/branches/release-1.3-branch/CHANGES.txt
    manifoldcf/branches/release-1.3-branch/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveRepositoryConnector.java

Propchange: manifoldcf/branches/release-1.3-branch/
------------------------------------------------------------------------------
  Merged /manifoldcf/trunk:r1505797

Modified: manifoldcf/branches/release-1.3-branch/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/branches/release-1.3-branch/CHANGES.txt?rev=1506452&r1=1506451&r2=1506452&view=diff
==============================================================================
--- manifoldcf/branches/release-1.3-branch/CHANGES.txt (original)
+++ manifoldcf/branches/release-1.3-branch/CHANGES.txt Wed Jul 24 09:07:45 2013
@@ -1,6 +1,10 @@
 ManifoldCF Change Log
 $Id$
 
+CONNECTORS-757: NPE's from GoogleDrive connector when crawling
+documents that don't have a file length.
+(Piergiorgio Lucidi, Karl Wright)
+
 ======================= Release 1.3 =====================
 
 CONNECTORS-756: Fix broken JDBC authority UI.

Modified: manifoldcf/branches/release-1.3-branch/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveRepositoryConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/release-1.3-branch/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveRepositoryConnector.java?rev=1506452&r1=1506451&r2=1506452&view=diff
==============================================================================
--- manifoldcf/branches/release-1.3-branch/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveRepositoryConnector.java (original)
+++ manifoldcf/branches/release-1.3-branch/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveRepositoryConnector.java Wed Jul 24 09:07:45 2013
@@ -1061,102 +1061,107 @@ public class GoogleDriveRepositoryConnec
             String documentURI = getUrl(googleFile, "application/pdf");
 
             // Get the file length
-            long fileLength = googleFile.getFileSize();
+            Long fileLength = googleFile.getFileSize();
+            if (fileLength != null) {
 
-            // Unpack the version string
-            ArrayList acls = new ArrayList();
-            StringBuilder denyAclBuffer = new StringBuilder();
-            int index = unpackList(acls,version,0,'+');
-            if (index < version.length() && version.charAt(index++) == '+') {
-              index = unpack(denyAclBuffer,version,index,'+');
-            }
+              // Unpack the version string
+              ArrayList acls = new ArrayList();
+              StringBuilder denyAclBuffer = new StringBuilder();
+              int index = unpackList(acls,version,0,'+');
+              if (index < version.length() && version.charAt(index++) == '+') {
+                index = unpack(denyAclBuffer,version,index,'+');
+              }
 
-            //otherwise process
-            RepositoryDocument rd = new RepositoryDocument();
+              //otherwise process
+              RepositoryDocument rd = new RepositoryDocument();
 
-            // Turn into acls and add into description
-            String[] aclArray = new String[acls.size()];
-            for (int j = 0; j < aclArray.length; j++) {
-              aclArray[j] = (String)acls.get(j);
-            }
-            rd.setACL(aclArray);
-            if (denyAclBuffer.length() > 0) {
-              String[] denyAclArray = new String[]{denyAclBuffer.toString()};
-              rd.setDenyACL(denyAclArray);
-            }
+              // Turn into acls and add into description
+              String[] aclArray = new String[acls.size()];
+              for (int j = 0; j < aclArray.length; j++) {
+                aclArray[j] = (String)acls.get(j);
+              }
+              rd.setACL(aclArray);
+              if (denyAclBuffer.length() > 0) {
+                String[] denyAclArray = new String[]{denyAclBuffer.toString()};
+                rd.setDenyACL(denyAclArray);
+              }
 
-            // Now do standard stuff
-            String mimeType = googleFile.getMimeType();
-            DateTime createdDate = googleFile.getCreatedDate();
-            DateTime modifiedDate = googleFile.getModifiedDate();
-            String extension = googleFile.getFileExtension();
-            String title = googleFile.getTitle();
-            
-            if (mimeType != null)
-              rd.setMimeType(mimeType);
-            if (createdDate != null)
-              rd.setCreatedDate(new Date(createdDate.getValue()));
-            if (modifiedDate != null)
-              rd.setModifiedDate(new Date(modifiedDate.getValue()));
-            if (extension != null)
-            {
-              if (title == null)
-                title = "";
-              rd.setFileName(title + "." + extension);
-            }
+              // Now do standard stuff
+              String mimeType = googleFile.getMimeType();
+              DateTime createdDate = googleFile.getCreatedDate();
+              DateTime modifiedDate = googleFile.getModifiedDate();
+              String extension = googleFile.getFileExtension();
+              String title = googleFile.getTitle();
+              
+              if (mimeType != null)
+                rd.setMimeType(mimeType);
+              if (createdDate != null)
+                rd.setCreatedDate(new Date(createdDate.getValue()));
+              if (modifiedDate != null)
+                rd.setModifiedDate(new Date(modifiedDate.getValue()));
+              if (extension != null)
+              {
+                if (title == null)
+                  title = "";
+                rd.setFileName(title + "." + extension);
+              }
 
-            // Get general document metadata
-            for (Entry<String, Object> entry : googleFile.entrySet()) {
-              rd.addField(entry.getKey(), entry.getValue().toString());
-            }
+              // Get general document metadata
+              for (Entry<String, Object> entry : googleFile.entrySet()) {
+                rd.addField(entry.getKey(), entry.getValue().toString());
+              }
 
-            // Fire up the document reading thread
-            DocumentReadingThread t = new DocumentReadingThread(documentURI);
-            try {
-              t.start();
-              boolean wasInterrupted = false;
+              // Fire up the document reading thread
+              DocumentReadingThread t = new DocumentReadingThread(documentURI);
               try {
-                InputStream is = t.getSafeInputStream();
+                t.start();
+                boolean wasInterrupted = false;
                 try {
-                  // Can only index while background thread is running!
-                  rd.setBinary(is, fileLength);
-                  activities.ingestDocument(nodeId, version, documentURI, rd);
+                  InputStream is = t.getSafeInputStream();
+                  try {
+                    // Can only index while background thread is running!
+                    rd.setBinary(is, fileLength);
+                    activities.ingestDocument(nodeId, version, documentURI, rd);
+                  } finally {
+                    is.close();
+                  }
+                } catch (ManifoldCFException e) {
+                  if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
+                    wasInterrupted = true;
+                  throw e;
+                } catch (java.net.SocketTimeoutException e) {
+                  throw e;
+                } catch (InterruptedIOException e) {
+                  wasInterrupted = true;
+                  throw e;
                 } finally {
-                  is.close();
+                  if (!wasInterrupted)
+                    t.finishUp();
                 }
-              } catch (ManifoldCFException e) {
-                if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
-                  wasInterrupted = true;
-                throw e;
+
+                // No errors.  Record the fact that we made it.
+                errorCode = "OK";
+                fileSize = new Long(fileLength);
+              } catch (InterruptedException e) {
+                t.interrupt();
+                throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
+                  ManifoldCFException.INTERRUPTED);
               } catch (java.net.SocketTimeoutException e) {
-                throw e;
+                Logging.connectors.warn("GOOGLEDRIVE: Socket timeout reading document: " + e.getMessage(), e);
+                handleIOException(e);
               } catch (InterruptedIOException e) {
-                wasInterrupted = true;
-                throw e;
-              } finally {
-                if (!wasInterrupted)
-                  t.finishUp();
+                t.interrupt();
+                throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
+                  ManifoldCFException.INTERRUPTED);
+              } catch (IOException e) {
+                errorCode = "IO ERROR";
+                errorDesc = e.getMessage();
+                Logging.connectors.warn("GOOGLEDRIVE: Error reading document: " + e.getMessage(), e);
+                handleIOException(e);
               }
-
-              // No errors.  Record the fact that we made it.
-              errorCode = "OK";
-              fileSize = new Long(fileLength);
-            } catch (InterruptedException e) {
-              t.interrupt();
-              throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
-                ManifoldCFException.INTERRUPTED);
-            } catch (java.net.SocketTimeoutException e) {
-              Logging.connectors.warn("GOOGLEDRIVE: Socket timeout reading document: " + e.getMessage(), e);
-              handleIOException(e);
-            } catch (InterruptedIOException e) {
-              t.interrupt();
-              throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
-                ManifoldCFException.INTERRUPTED);
-            } catch (IOException e) {
-              errorCode = "IO ERROR";
-              errorDesc = e.getMessage();
-              Logging.connectors.warn("GOOGLEDRIVE: Error reading document: " + e.getMessage(), e);
-              handleIOException(e);
+            } else {
+              errorCode = "NO LENGTH";
+              errorDesc = "Document "+nodeId+" had no length; skipping";
             }
           }
         }