You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/10/08 15:14:51 UTC

svn commit: r1630096 - /manifoldcf/branches/CONNECTORS-1067/connectors/jcifs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharedrive/SharedDriveConnector.java

Author: kwright
Date: Wed Oct  8 13:14:50 2014
New Revision: 1630096

URL: http://svn.apache.org/r1630096
Log:
Update jcfs connector

Modified:
    manifoldcf/branches/CONNECTORS-1067/connectors/jcifs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharedrive/SharedDriveConnector.java

Modified: manifoldcf/branches/CONNECTORS-1067/connectors/jcifs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharedrive/SharedDriveConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1067/connectors/jcifs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharedrive/SharedDriveConnector.java?rev=1630096&r1=1630095&r2=1630096&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1067/connectors/jcifs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharedrive/SharedDriveConnector.java (original)
+++ manifoldcf/branches/CONNECTORS-1067/connectors/jcifs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharedrive/SharedDriveConnector.java Wed Oct  8 13:14:50 2014
@@ -778,131 +778,189 @@ public class SharedDriveConnector extend
               if (fileName != null && !file.isHidden())
               {
                 String uri = ingestionURI;
+                String fileNameString = file.getName();
+                Date lastModifiedDate = new Date(file.lastModified());
+                Date creationDate = new Date(file.createTime());
+                String contentType = mapExtensionToMimeType(fileNameString);
 
-                if (activities.checkURLIndexable(uri))
+                if (!activities.checkURLIndexable(uri))
                 {
-                  // Initialize repository document with common stuff, and find the URI
-                  RepositoryDocument rd = new RepositoryDocument();
-                  prepareForIndexing(rd,file,
-                    shareAllow,shareDeny,
-                    parentAllow,parentDeny,
-                    documentAllow,documentDeny,
-                    pathAttributeName,pathAttributeValue);
-
-                  // manipulate path to include the DFS alias, not the literal path
-                  // String newPath = matchPrefix + fileName.substring(matchReplace.length());
-                  String newPath = fileName;
-                  if (checkNeedFileData(newPath, spec))
-                  {
-                    if (Logging.connectors.isDebugEnabled())
-                      Logging.connectors.debug("JCIFS: Local file data needed for '"+documentIdentifier+"'");
+                  Logging.connectors.debug("JCIFS: Skipping file because output connector cannot accept URL ('"+uri+"')");
+                  activities.recordActivity(null,ACTIVITY_ACCESS,
+                    null,documentIdentifier,"Skip","Output connector refused URL",null);
+                  activities.noDocument(documentIdentifier,versionString);
+                  continue;
+                }
+
+                if (!activities.checkMimeTypeIndexable(contentType))
+                {
+                  Logging.connectors.debug("JCIFS: Skipping file because output connector cannot accept content type ('"+contentType+"')");
+                  activities.recordActivity(null,ACTIVITY_ACCESS,
+                    null,documentIdentifier,"Skip","Output connector refused mime type",null);
+                  activities.noDocument(documentIdentifier,versionString);
+                  continue;
+                }
 
-                    // Create a temporary file, and use that for the check and then the ingest
-                    File tempFile = File.createTempFile("_sdc_",null);
+                if (!activities.checkDateIndexable(lastModifiedDate))
+                {
+                  Logging.connectors.debug("JCIFS: Skipping file because output connector cannot accept date ("+lastModifiedDate+")");
+                  activities.recordActivity(null,ACTIVITY_ACCESS,
+                    null,documentIdentifier,"Skip","Output connector refused date",null);
+                  activities.noDocument(documentIdentifier,versionString);
+                  continue;
+                }
+
+                // Initialize repository document with common stuff, and find the URI
+                RepositoryDocument rd = new RepositoryDocument();
+                
+                //If using the lastAccess patched/Google version of jcifs then this can be uncommented
+                //Date lastAccessDate = new Date(file.lastAccess());
+                Integer attributes = file.getAttributes();
+                String shareName = file.getShare();
+
+                rd.setFileName(fileNameString);
+                if (contentType != null)
+                  rd.setMimeType(contentType);
+                rd.addField("lastModified", lastModifiedDate.toString());
+                rd.setModifiedDate(lastModifiedDate);
+                
+                // Add extra obtainable fields to the field map
+                rd.addField("createdOn", creationDate.toString());
+                rd.setCreatedDate(creationDate);
+
+                //rd.addField("lastAccess", lastModifiedDate.toString());
+                rd.addField("attributes", Integer.toString(attributes));
+                rd.addField("shareName", shareName);
+
+                setDocumentSecurity(rd,shareAllow,shareDeny,parentAllow,parentDeny,documentAllow,documentDeny);
+                setPathMetadata(rd,pathAttributeName,pathAttributeValue);
+
+                // manipulate path to include the DFS alias, not the literal path
+                // String newPath = matchPrefix + fileName.substring(matchReplace.length());
+                String newPath = fileName;
+                if (checkNeedFileData(newPath, spec))
+                {
+                  if (Logging.connectors.isDebugEnabled())
+                    Logging.connectors.debug("JCIFS: Local file data needed for '"+documentIdentifier+"'");
+
+                  // Create a temporary file, and use that for the check and then the ingest
+                  File tempFile = File.createTempFile("_sdc_",null);
+                  try
+                  {
+                    FileOutputStream os = new FileOutputStream(tempFile);
                     try
                     {
-                      FileOutputStream os = new FileOutputStream(tempFile);
+
+                      // Now, make a local copy so we can fingerprint
+                      InputStream inputStream = getFileInputStream(file);
                       try
                       {
-
-                        // Now, make a local copy so we can fingerprint
-                        InputStream inputStream = getFileInputStream(file);
-                        try
+                        // Copy!
+                        if (transferBuffer == null)
+                          transferBuffer = new byte[65536];
+                        while (true)
                         {
-                          // Copy!
-                          if (transferBuffer == null)
-                            transferBuffer = new byte[65536];
-                          while (true)
-                          {
-                            int amt = inputStream.read(transferBuffer,0,transferBuffer.length);
-                            if (amt == -1)
-                              break;
-                            os.write(transferBuffer,0,amt);
-                          }
-                        }
-                        finally
-                        {
-                          inputStream.close();
+                          int amt = inputStream.read(transferBuffer,0,transferBuffer.length);
+                          if (amt == -1)
+                            break;
+                          os.write(transferBuffer,0,amt);
                         }
                       }
                       finally
                       {
-                        os.close();
+                        inputStream.close();
                       }
+                    }
+                    finally
+                    {
+                      os.close();
+                    }
 
-                      if (checkIngest(tempFile, newPath, spec, activities))
+                    if (checkIngest(tempFile, newPath, spec, activities))
+                    {
+                      long fileLength = tempFile.length();
+                      if (!activities.checkLengthIndexable(fileLength))
                       {
-                        if (Logging.connectors.isDebugEnabled())
-                          Logging.connectors.debug("JCIFS: Decided to ingest '"+documentIdentifier+"'");
-                        // OK, do ingestion itself!
-                        InputStream inputStream = new FileInputStream(tempFile);
-                        try
-                        {
-                          rd.setBinary(inputStream, tempFile.length());
-                          
-                          activities.ingestDocumentWithException(documentIdentifier, versionString, uri, rd);
-                        }
-                        finally
-                        {
-                          inputStream.close();
-                        }
-
-                        // I put this record here deliberately for two reasons:
-                        // (1) the other path includes ingestion time, and
-                        // (2) if anything fails up to and during ingestion, I want THAT failure record to be written, not this one.
-                        // So, really, ACTIVITY_ACCESS is a bit more than just fetch for JCIFS...
-                        activities.recordActivity(new Long(startFetchTime),ACTIVITY_ACCESS,
-                          new Long(tempFile.length()),documentIdentifier,"Success",null,null);
+                        Logging.connectors.debug("JCIFS: Skipping file because output connector cannot accept length ("+fileLength+")");
+                        activities.recordActivity(null,ACTIVITY_ACCESS,
+                          null,documentIdentifier,"Skip","Output connector refused length",null);
+                        activities.noDocument(documentIdentifier,versionString);
+                        continue;
+                      }
 
+                      if (Logging.connectors.isDebugEnabled())
+                        Logging.connectors.debug("JCIFS: Decided to ingest '"+documentIdentifier+"'");
+                      // OK, do ingestion itself!
+                      InputStream inputStream = new FileInputStream(tempFile);
+                      try
+                      {
+                        rd.setBinary(inputStream, fileLength);
+                          
+                        activities.ingestDocumentWithException(documentIdentifier, versionString, uri, rd);
                       }
-                      else
+                      finally
                       {
-                        // We must actively remove the document here, because the getDocumentVersions()
-                        // method has no way of signalling this, since it does not do the fingerprinting.
-                        if (Logging.connectors.isDebugEnabled())
-                          Logging.connectors.debug("JCIFS: Decided to remove '"+documentIdentifier+"'");
-                        activities.noDocument(documentIdentifier, versionString);
-                        // We should record the access here as well, since this is a non-exception way through the code path.
-                        // (I noticed that this was not being recorded in the history while fixing 25477.)
-                        activities.recordActivity(new Long(startFetchTime),ACTIVITY_ACCESS,
-                          new Long(tempFile.length()),documentIdentifier,"Success",null,null);
+                        inputStream.close();
                       }
-                    }
-                    finally
-                    {
-                      tempFile.delete();
-                    }
-                  }
-                  else
-                  {
-                    if (Logging.connectors.isDebugEnabled())
-                      Logging.connectors.debug("JCIFS: Local file data not needed for '"+documentIdentifier+"'");
 
-                    // Presume that since the file was queued that it fulfilled the needed criteria.
-                    // Go off and ingest the fast way.
+                      // I put this record here deliberately for two reasons:
+                      // (1) the other path includes ingestion time, and
+                      // (2) if anything fails up to and during ingestion, I want THAT failure record to be written, not this one.
+                      // So, really, ACTIVITY_ACCESS is a bit more than just fetch for JCIFS...
+                      activities.recordActivity(new Long(startFetchTime),ACTIVITY_ACCESS,
+                        new Long(tempFile.length()),documentIdentifier,"Success",null,null);
 
-                    // Ingest the document.
-                    InputStream inputStream = getFileInputStream(file);
-                    try
-                    {
-                      rd.setBinary(inputStream, fileLength(file));
-                      
-                      activities.ingestDocumentWithException(documentIdentifier, versionString, uri, rd);
                     }
-                    finally
+                    else
                     {
-                      inputStream.close();
+                      // We must actively remove the document here, because the getDocumentVersions()
+                      // method has no way of signalling this, since it does not do the fingerprinting.
+                      if (Logging.connectors.isDebugEnabled())
+                        Logging.connectors.debug("JCIFS: Decided to remove '"+documentIdentifier+"'");
+                      activities.noDocument(documentIdentifier, versionString);
+                      // We should record the access here as well, since this is a non-exception way through the code path.
+                      // (I noticed that this was not being recorded in the history while fixing 25477.)
+                      activities.recordActivity(new Long(startFetchTime),ACTIVITY_ACCESS,
+                        new Long(tempFile.length()),documentIdentifier,"Success",null,null);
                     }
-                    activities.recordActivity(new Long(startFetchTime),ACTIVITY_ACCESS,
-                      new Long(fileLength(file)),documentIdentifier,"Success",null,null);
+                  }
+                  finally
+                  {
+                    tempFile.delete();
                   }
                 }
                 else
                 {
-                  Logging.connectors.debug("JCIFS: Skipping file because output connector cannot accept it");
-                  activities.recordActivity(null,ACTIVITY_ACCESS,
-                    null,documentIdentifier,"Skip","Output connector refused",null);
-                  activities.noDocument(documentIdentifier,versionString);
+                  if (Logging.connectors.isDebugEnabled())
+                    Logging.connectors.debug("JCIFS: Local file data not needed for '"+documentIdentifier+"'");
+
+                  long fileLength = fileLength(file);
+                  if (!activities.checkLengthIndexable(fileLength))
+                  {
+                    Logging.connectors.debug("JCIFS: Skipping file because output connector cannot accept length ("+fileLength+")");
+                    activities.recordActivity(null,ACTIVITY_ACCESS,
+                      null,documentIdentifier,"Skip","Output connector refused length",null);
+                    activities.noDocument(documentIdentifier,versionString);
+                    continue;
+                  }
+
+                  // Presume that since the file was queued that it fulfilled the needed criteria.
+                  // Go off and ingest the fast way.
+                  
+                  // Ingest the document.
+                  InputStream inputStream = getFileInputStream(file);
+                  try
+                  {
+                    rd.setBinary(inputStream, fileLength);
+                      
+                    activities.ingestDocumentWithException(documentIdentifier, versionString, uri, rd);
+                  }
+                  finally
+                  {
+                    inputStream.close();
+                  }
+                  activities.recordActivity(new Long(startFetchTime),ACTIVITY_ACCESS,
+                    new Long(fileLength(file)),documentIdentifier,"Success",null,null);
                 }
               }
               else
@@ -1032,40 +1090,6 @@ public class SharedDriveConnector extend
   }
 
 
-  protected static void prepareForIndexing(RepositoryDocument rd, SmbFile file,
-    String[] shareAllow, String[] shareDeny, String[] parentAllow, String[] parentDeny, String[] allow, String[] deny,
-    String pathAttributeName, String pathAttributeValue)
-    throws ManifoldCFException, SmbException
-  {
-    String fileNameString = file.getName();
-    Date lastModifiedDate = new Date(file.lastModified());
-    Date creationDate = new Date(file.createTime());
-    //If using the lastAccess patched/Google version of jcifs then this can be uncommented
-    //Date lastAccessDate = new Date(file.lastAccess());
-    Integer attributes = file.getAttributes();
-    String shareName = file.getShare();
-
-    
-    String contentType = mapExtensionToMimeType(fileNameString);
-
-    rd.setFileName(fileNameString);
-    if (contentType != null)
-      rd.setMimeType(contentType);
-    rd.addField("lastModified", lastModifiedDate.toString());
-    rd.setModifiedDate(lastModifiedDate);
-    
-    // Add extra obtainable fields to the field map
-    rd.addField("createdOn", creationDate.toString());
-    rd.setCreatedDate(creationDate);
-
-    //rd.addField("lastAccess", lastModifiedDate.toString());
-    rd.addField("attributes", Integer.toString(attributes));
-    rd.addField("shareName", shareName);
-
-    setDocumentSecurity(rd,shareAllow,shareDeny,parentAllow,parentDeny,allow,deny);
-    setPathMetadata(rd,pathAttributeName,pathAttributeValue);
-  }
-  
   /** Map an extension to a mime type */
   protected static String mapExtensionToMimeType(String fileName)
   {