You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/10/08 14:50:24 UTC

svn commit: r1630091 - /manifoldcf/branches/CONNECTORS-1067/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/hdfs/HDFSRepositoryConnector.java

Author: kwright
Date: Wed Oct  8 12:50:24 2014
New Revision: 1630091

URL: http://svn.apache.org/r1630091
Log:
Update hdfs connector

Modified:
    manifoldcf/branches/CONNECTORS-1067/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/hdfs/HDFSRepositoryConnector.java

Modified: manifoldcf/branches/CONNECTORS-1067/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/hdfs/HDFSRepositoryConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1067/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/hdfs/HDFSRepositoryConnector.java?rev=1630091&r1=1630090&r2=1630091&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1067/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/hdfs/HDFSRepositoryConnector.java (original)
+++ manifoldcf/branches/CONNECTORS-1067/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/hdfs/HDFSRepositoryConnector.java Wed Oct  8 12:50:24 2014
@@ -386,27 +386,49 @@ public class HDFSRepositoryConnector ext
               continue;
             }
 
+            // It is a file to be indexed.
             long fileLength = fileStatus.getLen();
-            if (!activities.checkLengthIndexable(fileLength)) {
+            String fileName = fileStatus.getPath().getName();
+            String mimeType = mapExtensionToMimeType(fileStatus.getPath().getName());
+            Date modifiedDate = new Date(fileStatus.getModificationTime());
+            String uri;
+            if (convertPath != null) {
+              uri = convertToWGETURI(convertPath);
+            } else {
+              uri = fileStatus.getPath().toUri().toString();
+            }
+            
+            if (!activities.checkLengthIndexable(fileLength))
+            {
+              activities.noDocument(documentIdentifier,versionString);
+              continue;
+            }
+            
+            if (!activities.checkURLIndexable(uri))
+            {
+              activities.noDocument(documentIdentifier,versionString);
+              continue;
+            }
+            
+            if (!activities.checkMimeTypeIndexable(mimeType))
+            {
+              activities.noDocument(documentIdentifier,versionString);
+              continue;
+            }
+            
+            if (!activities.checkDateIndexable(modifiedDate))
+            {
               activities.noDocument(documentIdentifier,versionString);
               continue;
             }
-
-            // It is a file to be indexed.
             
             // Prepare the metadata part of RepositoryDocument
             RepositoryDocument data = new RepositoryDocument();
 
-            data.setFileName(fileStatus.getPath().getName());
-            data.setMimeType(mapExtensionToMimeType(fileStatus.getPath().getName()));
-            data.setModifiedDate(new Date(fileStatus.getModificationTime()));
+            data.setFileName(fileName);
+            data.setMimeType(mimeType);
+            data.setModifiedDate(modifiedDate);
 
-            String uri;
-            if (convertPath != null) {
-              uri = convertToWGETURI(convertPath);
-            } else {
-              uri = fileStatus.getPath().toUri().toString();
-            }
             data.addField("uri",uri);
 
             // We will record document fetch as an activity