You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/10/08 14:50:24 UTC
svn commit: r1630091 -
/manifoldcf/branches/CONNECTORS-1067/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/hdfs/HDFSRepositoryConnector.java
Author: kwright
Date: Wed Oct 8 12:50:24 2014
New Revision: 1630091
URL: http://svn.apache.org/r1630091
Log:
Update hdfs connector
Modified:
manifoldcf/branches/CONNECTORS-1067/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/hdfs/HDFSRepositoryConnector.java
Modified: manifoldcf/branches/CONNECTORS-1067/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/hdfs/HDFSRepositoryConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1067/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/hdfs/HDFSRepositoryConnector.java?rev=1630091&r1=1630090&r2=1630091&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1067/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/hdfs/HDFSRepositoryConnector.java (original)
+++ manifoldcf/branches/CONNECTORS-1067/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/hdfs/HDFSRepositoryConnector.java Wed Oct 8 12:50:24 2014
@@ -386,27 +386,49 @@ public class HDFSRepositoryConnector ext
continue;
}
+ // It is a file to be indexed.
long fileLength = fileStatus.getLen();
- if (!activities.checkLengthIndexable(fileLength)) {
+ String fileName = fileStatus.getPath().getName();
+ String mimeType = mapExtensionToMimeType(fileStatus.getPath().getName());
+ Date modifiedDate = new Date(fileStatus.getModificationTime());
+ String uri;
+ if (convertPath != null) {
+ uri = convertToWGETURI(convertPath);
+ } else {
+ uri = fileStatus.getPath().toUri().toString();
+ }
+
+ if (!activities.checkLengthIndexable(fileLength))
+ {
+ activities.noDocument(documentIdentifier,versionString);
+ continue;
+ }
+
+ if (!activities.checkURLIndexable(uri))
+ {
+ activities.noDocument(documentIdentifier,versionString);
+ continue;
+ }
+
+ if (!activities.checkMimeTypeIndexable(mimeType))
+ {
+ activities.noDocument(documentIdentifier,versionString);
+ continue;
+ }
+
+ if (!activities.checkDateIndexable(modifiedDate))
+ {
activities.noDocument(documentIdentifier,versionString);
continue;
}
-
- // It is a file to be indexed.
// Prepare the metadata part of RepositoryDocument
RepositoryDocument data = new RepositoryDocument();
- data.setFileName(fileStatus.getPath().getName());
- data.setMimeType(mapExtensionToMimeType(fileStatus.getPath().getName()));
- data.setModifiedDate(new Date(fileStatus.getModificationTime()));
+ data.setFileName(fileName);
+ data.setMimeType(mimeType);
+ data.setModifiedDate(modifiedDate);
- String uri;
- if (convertPath != null) {
- uri = convertToWGETURI(convertPath);
- } else {
- uri = fileStatus.getPath().toUri().toString();
- }
data.addField("uri",uri);
// We will record document fetch as an activity