You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/10/08 14:43:55 UTC
svn commit: r1630087 -
/manifoldcf/branches/CONNECTORS-1067/connectors/gridfs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/gridfs/GridFSRepositoryConnector.java
Author: kwright
Date: Wed Oct 8 12:43:54 2014
New Revision: 1630087
URL: http://svn.apache.org/r1630087
Log:
Hook up gridfs connector
Modified:
manifoldcf/branches/CONNECTORS-1067/connectors/gridfs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/gridfs/GridFSRepositoryConnector.java
Modified: manifoldcf/branches/CONNECTORS-1067/connectors/gridfs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/gridfs/GridFSRepositoryConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1067/connectors/gridfs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/gridfs/GridFSRepositoryConnector.java?rev=1630087&r1=1630086&r2=1630087&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1067/connectors/gridfs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/gridfs/GridFSRepositoryConnector.java (original)
+++ manifoldcf/branches/CONNECTORS-1067/connectors/gridfs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/gridfs/GridFSRepositoryConnector.java Wed Oct 8 12:43:54 2014
@@ -415,8 +415,6 @@ public class GridFSRepositoryConnector e
String errorDesc = null;
String version = versionString;
- RepositoryDocument rd = new RepositoryDocument();
-
if (Logging.connectors.isDebugEnabled()) {
Logging.connectors.debug("GridFS: Processing document _id = " + _id);
}
@@ -441,44 +439,77 @@ public class GridFSRepositoryConnector e
}
if (validURL) {
long fileLenght = document.getLength();
- InputStream is = document.getInputStream();
- try {
- Date indexingDate = new Date();
- rd.setBinary(is, fileLenght);
- rd.setCreatedDate(document.getUploadDate());
- rd.setFileName(document.getFilename());
- rd.setIndexingDate(indexingDate);
- rd.setMimeType(document.getContentType());
- String[] aclsArray = null;
- String[] denyAclsArray = null;
- if (acl != null) {
- try {
- Object aclObject = document.getMetaData().get(acl);
- if (aclObject != null) {
- List<String> acls = (List<String>) aclObject;
- aclsArray = (String[]) acls.toArray();
- }
- } catch (ClassCastException e) {
- // This is bad because security will fail
- Logging.connectors.warn("GridFS: Document " + _id + " metadata ACL field doesn't contain List<String> type.");
- throw new ManifoldCFException("Security decoding error: "+e.getMessage(),e);
+ Date createdDate = document.getUploadDate();
+ String fileName = document.getFilename();
+ String mimeType = document.getContentType();
+
+ if (!activities.checkURLIndexable(urlValue))
+ {
+ Logging.connectors.warn("GridFS: Document " + _id + " has a URL excluded by the output connector ('" + urlValue + "') - skipping.");
+ activities.noDocument(_id, version);
+ continue;
+ }
+
+ if (!activities.checkLengthIndexable(fileLenght))
+ {
+ Logging.connectors.warn("GridFS: Document " + _id + " has a length excluded by the output connector (" + fileLenght + ") - skipping.");
+ activities.noDocument(_id, version);
+ continue;
+ }
+
+ if (!activities.checkMimeTypeIndexable(mimeType))
+ {
+ Logging.connectors.warn("GridFS: Document " + _id + " has a mime type excluded by the output connector ('" + mimeType + "') - skipping.");
+ activities.noDocument(_id, version);
+ continue;
+ }
+
+ if (!activities.checkDateIndexable(createdDate))
+ {
+ Logging.connectors.warn("GridFS: Document " + _id + " has a date excluded by the output connector (" + createdDate + ") - skipping.");
+ activities.noDocument(_id, version);
+ continue;
+ }
+
+ RepositoryDocument rd = new RepositoryDocument();
+ rd.setCreatedDate(createdDate);
+ rd.setModifiedDate(createdDate);
+ rd.setFileName(fileName);
+ rd.setMimeType(mimeType);
+ String[] aclsArray = null;
+ String[] denyAclsArray = null;
+ if (acl != null) {
+ try {
+ Object aclObject = document.getMetaData().get(acl);
+ if (aclObject != null) {
+ List<String> acls = (List<String>) aclObject;
+ aclsArray = (String[]) acls.toArray();
}
+ } catch (ClassCastException e) {
+ // This is bad because security will fail
+ Logging.connectors.warn("GridFS: Document " + _id + " metadata ACL field doesn't contain List<String> type.");
+ throw new ManifoldCFException("Security decoding error: "+e.getMessage(),e);
}
- if (denyAcl != null) {
- try {
- Object denyAclObject = document.getMetaData().get(denyAcl);
- if (denyAclObject != null) {
- List<String> denyAcls = (List<String>) denyAclObject;
- denyAcls.add(GLOBAL_DENY_TOKEN);
- denyAclsArray = (String[]) denyAcls.toArray();
- }
- } catch (ClassCastException e) {
- // This is bad because security will fail
- Logging.connectors.warn("GridFS: Document " + _id + " metadata DenyACL field doesn't contain List<String> type.");
- throw new ManifoldCFException("Security decoding error: "+e.getMessage(),e);
+ }
+ if (denyAcl != null) {
+ try {
+ Object denyAclObject = document.getMetaData().get(denyAcl);
+ if (denyAclObject != null) {
+ List<String> denyAcls = (List<String>) denyAclObject;
+ denyAcls.add(GLOBAL_DENY_TOKEN);
+ denyAclsArray = (String[]) denyAcls.toArray();
}
+ } catch (ClassCastException e) {
+ // This is bad because security will fail
+ Logging.connectors.warn("GridFS: Document " + _id + " metadata DenyACL field doesn't contain List<String> type.");
+ throw new ManifoldCFException("Security decoding error: "+e.getMessage(),e);
}
- rd.setSecurity(RepositoryDocument.SECURITY_TYPE_DOCUMENT,aclsArray,denyAclsArray);
+ }
+ rd.setSecurity(RepositoryDocument.SECURITY_TYPE_DOCUMENT,aclsArray,denyAclsArray);
+
+ InputStream is = document.getInputStream();
+ try {
+ rd.setBinary(is, fileLenght);
try {
activities.ingestDocumentWithException(_id, version, urlValue, rd);
} catch (IOException e) {