You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/10/21 12:16:35 UTC

svn commit: r1633337 - in /manifoldcf/branches/dev_1x: ./ connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java

Author: kwright
Date: Tue Oct 21 10:16:35 2014
New Revision: 1633337

URL: http://svn.apache.org/r1633337
Log:
Pull up more CONNECTORS-1077 fixes from trunk

Modified:
    manifoldcf/branches/dev_1x/   (props changed)
    manifoldcf/branches/dev_1x/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java

Propchange: manifoldcf/branches/dev_1x/
------------------------------------------------------------------------------
  Merged /manifoldcf/trunk:r1633336

Modified: manifoldcf/branches/dev_1x/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java?rev=1633337&r1=1633336&r2=1633337&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java (original)
+++ manifoldcf/branches/dev_1x/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java Tue Oct 21 10:16:35 2014
@@ -1099,7 +1099,6 @@ public class CmisRepositoryConnector ext
       }
     }
 
-    getSession();
 
     for (String documentIdentifier : documentIdentifiers) {
       
@@ -1107,6 +1106,8 @@ public class CmisRepositoryConnector ext
         Logging.connectors.debug("CMIS: Processing document identifier '"
             + documentIdentifier + "'");
 
+      getSession();
+      
       // Load the object.  If this fails, it has been deleted.
       CmisObject cmisObject;
       try {
@@ -1151,214 +1152,236 @@ public class CmisRepositoryConnector ext
       
       if (versionString.length() == 0 || activities.checkDocumentNeedsReindexing(documentIdentifier,versionString)) {
         // Index this document
-        String errorCode = "OK";
-        String errorDesc = StringUtils.EMPTY;
+        String errorCode = null;
+        String errorDesc = null;
+        Long fileLengthLong = null;
         long startTime = System.currentTimeMillis();
-        
-        String baseTypeId = cmisObject.getBaseType().getId();
-
-        if (baseTypeId.equals(CMIS_FOLDER_BASE_TYPE)) {
+        try {
+          String baseTypeId = cmisObject.getBaseType().getId();
 
-          // adding all the children for a folder
+          if (baseTypeId.equals(CMIS_FOLDER_BASE_TYPE)) {
 
-          Folder folder = (Folder) cmisObject;
-          ItemIterable<CmisObject> children = folder.getChildren();
-          for (CmisObject child : children) {
-            activities.addDocumentReference(child.getId(), documentIdentifier,
-                RELATIONSHIP_CHILD);
-          }
-        } else if(baseTypeId.equals(CMIS_DOCUMENT_BASE_TYPE)) {
-          // content ingestion
+            // adding all the children for a folder
 
-          Document document = (Document) cmisObject;
-          
-          Date createdDate = document.getCreationDate().getTime();
-          Date modifiedDate = document.getLastModificationDate().getTime();
-          long fileLength = document.getContentStreamLength();
-          String fileName = document.getContentStreamFileName();
-          String mimeType = document.getContentStreamMimeType();
-          //documentURI
-          String documentURI = CmisRepositoryConnectorUtils.getDocumentURL(document, session);
-          
-          // Do any filtering (which will save us work)
-          if (!activities.checkURLIndexable(documentURI))
-          {
-            activities.noDocument(documentIdentifier,versionString);
-            continue;
-          }
-          
-          if (!activities.checkMimeTypeIndexable(mimeType))
-          {
-            activities.noDocument(documentIdentifier,versionString);
-            continue;
-          }
+            Folder folder = (Folder) cmisObject;
+            ItemIterable<CmisObject> children = folder.getChildren();
+            for (CmisObject child : children) {
+              activities.addDocumentReference(child.getId(), documentIdentifier,
+                  RELATIONSHIP_CHILD);
+            }
+          } else if(baseTypeId.equals(CMIS_DOCUMENT_BASE_TYPE)) {
+            // content ingestion
 
-          if (!activities.checkLengthIndexable(fileLength))
-          {
-            activities.noDocument(documentIdentifier,versionString);
-            continue;
-          }
-          
-          if (!activities.checkDateIndexable(modifiedDate))
-          {
-            activities.noDocument(documentIdentifier,versionString);
-            continue;
-          }
-          
-          RepositoryDocument rd = new RepositoryDocument();
-          rd.setFileName(fileName);
-          rd.setMimeType(mimeType);
-          rd.setCreatedDate(createdDate);
-          rd.setModifiedDate(modifiedDate);
-              
-          InputStream is;
-          try {
-            if (fileLength > 0)
-              is = document.getContentStream().getStream();
-            else
-              is = null;
-          } catch (CmisObjectNotFoundException e) {
-            // Document gone
-            activities.deleteDocument(documentIdentifier);
-            continue;
-          }
+            Document document = (Document) cmisObject;
             
-          try {
-            //binary
-            if(is != null) {
-              rd.setBinary(is, fileLength);
-            } else {
-              rd.setBinary(new NullInputStream(0),0);
+            Date createdDate = document.getCreationDate().getTime();
+            Date modifiedDate = document.getLastModificationDate().getTime();
+            long fileLength = document.getContentStreamLength();
+            String fileName = document.getContentStreamFileName();
+            String mimeType = document.getContentStreamMimeType();
+            //documentURI
+            String documentURI = CmisRepositoryConnectorUtils.getDocumentURL(document, session);
+            
+            // Do any filtering (which will save us work)
+            if (!activities.checkURLIndexable(documentURI))
+            {
+              activities.noDocument(documentIdentifier,versionString);
+              errorCode = activities.EXCLUDED_URL;
+              errorDesc = "Excluding due to URL ('"+documentURI+"')";
+              continue;
+            }
+            
+            if (!activities.checkMimeTypeIndexable(mimeType))
+            {
+              activities.noDocument(documentIdentifier,versionString);
+              errorCode = activities.EXCLUDED_MIMETYPE;
+              errorDesc = "Excluding due to mime type ("+mimeType+")";
+              continue;
             }
 
-            //properties
-            List<Property<?>> properties = document.getProperties();
-            String id = StringUtils.EMPTY;
-            for (Property<?> property : properties) {
-              String propertyId = property.getId();
+            if (!activities.checkLengthIndexable(fileLength))
+            {
+              activities.noDocument(documentIdentifier,versionString);
+              errorCode = activities.EXCLUDED_LENGTH;
+              errorDesc = "Excluding due to length ("+fileLength+")";
+              continue;
+            }
+            
+            if (!activities.checkDateIndexable(modifiedDate))
+            {
+              activities.noDocument(documentIdentifier,versionString);
+              errorCode = activities.EXCLUDED_DATE;
+              errorDesc = "Excluding due to date ("+modifiedDate+")";
+              continue;
+            }
+            
+            RepositoryDocument rd = new RepositoryDocument();
+            rd.setFileName(fileName);
+            rd.setMimeType(mimeType);
+            rd.setCreatedDate(createdDate);
+            rd.setModifiedDate(modifiedDate);
                 
-              if(CmisRepositoryConnectorUtils.existsInSelectClause(cmisQuery, propertyId)){
+            InputStream is;
+            try {
+              if (fileLength > 0)
+                is = document.getContentStream().getStream();
+              else
+                is = null;
+            } catch (CmisObjectNotFoundException e) {
+              // Document gone
+              activities.deleteDocument(documentIdentifier);
+              continue;
+            }
+              
+            try {
+              //binary
+              if(is != null) {
+                rd.setBinary(is, fileLength);
+              } else {
+                rd.setBinary(new NullInputStream(0),0);
+              }
+
+              //properties
+              List<Property<?>> properties = document.getProperties();
+              String id = StringUtils.EMPTY;
+              for (Property<?> property : properties) {
+                String propertyId = property.getId();
                   
-                if (propertyId.endsWith(Constants.PARAM_OBJECT_ID)) {
-                  id = (String) property.getValue();
-      
-                  if (property.getValue() !=null 
-                      || property.getValues() != null) {
-                    PropertyType propertyType = property.getType();
-        
-                    switch (propertyType) {
-        
-                    case STRING:
-                    case ID:
-                    case URI:
-                    case HTML:
-                      if(property.isMultiValued()){
-                        List<String> htmlPropertyValues = (List<String>) property.getValues();
-                        for (String htmlPropertyValue : htmlPropertyValues) {
-                          rd.addField(propertyId, htmlPropertyValue);
-                        }
-                      } else {
-                        String stringValue = (String) property.getValue();
-                        if(StringUtils.isNotEmpty(stringValue)){
-                          rd.addField(propertyId, stringValue);
-                        }
-                      }
-                      break;
-             
-                    case BOOLEAN:
-                      if(property.isMultiValued()){
-                        List<Boolean> booleanPropertyValues = (List<Boolean>) property.getValues();
-                        for (Boolean booleanPropertyValue : booleanPropertyValues) {
-                          rd.addField(propertyId, booleanPropertyValue.toString());
-                        }
-                      } else {
-                        Boolean booleanValue = (Boolean) property.getValue();
-                        if(booleanValue!=null){
-                          rd.addField(propertyId, booleanValue.toString());
-                        }
-                      }
-                      break;
+                if(CmisRepositoryConnectorUtils.existsInSelectClause(cmisQuery, propertyId)){
+                    
+                  if (propertyId.endsWith(Constants.PARAM_OBJECT_ID)) {
+                    id = (String) property.getValue();
         
-                    case INTEGER:
-                      if(property.isMultiValued()){
-                        List<BigInteger> integerPropertyValues = (List<BigInteger>) property.getValues();
-                        for (BigInteger integerPropertyValue : integerPropertyValues) {
-                          rd.addField(propertyId, integerPropertyValue.toString());
-                        }
-                      } else {
-                        BigInteger integerValue = (BigInteger) property.getValue();
-                        if(integerValue!=null){
-                          rd.addField(propertyId, integerValue.toString());
+                    if (property.getValue() !=null 
+                        || property.getValues() != null) {
+                      PropertyType propertyType = property.getType();
+          
+                      switch (propertyType) {
+          
+                      case STRING:
+                      case ID:
+                      case URI:
+                      case HTML:
+                        if(property.isMultiValued()){
+                          List<String> htmlPropertyValues = (List<String>) property.getValues();
+                          for (String htmlPropertyValue : htmlPropertyValues) {
+                            rd.addField(propertyId, htmlPropertyValue);
+                          }
+                        } else {
+                          String stringValue = (String) property.getValue();
+                          if(StringUtils.isNotEmpty(stringValue)){
+                            rd.addField(propertyId, stringValue);
+                          }
                         }
-                      }
-                      break;
-        
-                    case DECIMAL:
-                      if(property.isMultiValued()){
-                        List<BigDecimal> decimalPropertyValues = (List<BigDecimal>) property.getValues();
-                        for (BigDecimal decimalPropertyValue : decimalPropertyValues) {
-                          rd.addField(propertyId, decimalPropertyValue.toString());
+                        break;
+               
+                      case BOOLEAN:
+                        if(property.isMultiValued()){
+                          List<Boolean> booleanPropertyValues = (List<Boolean>) property.getValues();
+                          for (Boolean booleanPropertyValue : booleanPropertyValues) {
+                            rd.addField(propertyId, booleanPropertyValue.toString());
+                          }
+                        } else {
+                          Boolean booleanValue = (Boolean) property.getValue();
+                          if(booleanValue!=null){
+                            rd.addField(propertyId, booleanValue.toString());
+                          }
                         }
-                      } else {
-                        BigDecimal decimalValue = (BigDecimal) property.getValue();
-                        if(decimalValue!=null){
-                          rd.addField(propertyId, decimalValue.toString());
+                        break;
+          
+                      case INTEGER:
+                        if(property.isMultiValued()){
+                          List<BigInteger> integerPropertyValues = (List<BigInteger>) property.getValues();
+                          for (BigInteger integerPropertyValue : integerPropertyValues) {
+                            rd.addField(propertyId, integerPropertyValue.toString());
+                          }
+                        } else {
+                          BigInteger integerValue = (BigInteger) property.getValue();
+                          if(integerValue!=null){
+                            rd.addField(propertyId, integerValue.toString());
+                          }
                         }
-                      }
-                      break;
-        
-                    case DATETIME:
-                      if(property.isMultiValued()){
-                        List<GregorianCalendar> datePropertyValues = (List<GregorianCalendar>) property.getValues();
-                        for (GregorianCalendar datePropertyValue : datePropertyValues) {
-                          rd.addField(propertyId,
-                              ISO8601_DATE_FORMATTER.format(datePropertyValue.getTime()));
+                        break;
+          
+                      case DECIMAL:
+                        if(property.isMultiValued()){
+                          List<BigDecimal> decimalPropertyValues = (List<BigDecimal>) property.getValues();
+                          for (BigDecimal decimalPropertyValue : decimalPropertyValues) {
+                            rd.addField(propertyId, decimalPropertyValue.toString());
+                          }
+                        } else {
+                          BigDecimal decimalValue = (BigDecimal) property.getValue();
+                          if(decimalValue!=null){
+                            rd.addField(propertyId, decimalValue.toString());
+                          }
                         }
-                      } else {
-                        GregorianCalendar dateValue = (GregorianCalendar) property.getValue();
-                        if(dateValue!=null){
-                          rd.addField(propertyId, ISO8601_DATE_FORMATTER.format(dateValue.getTime()));
+                        break;
+          
+                      case DATETIME:
+                        if(property.isMultiValued()){
+                          List<GregorianCalendar> datePropertyValues = (List<GregorianCalendar>) property.getValues();
+                          for (GregorianCalendar datePropertyValue : datePropertyValues) {
+                            rd.addField(propertyId,
+                                ISO8601_DATE_FORMATTER.format(datePropertyValue.getTime()));
+                          }
+                        } else {
+                          GregorianCalendar dateValue = (GregorianCalendar) property.getValue();
+                          if(dateValue!=null){
+                            rd.addField(propertyId, ISO8601_DATE_FORMATTER.format(dateValue.getTime()));
+                          }
                         }
+                        break;
+          
+                      default:
+                        break;
                       }
-                      break;
-        
-                    default:
-                      break;
                     }
+                      
                   }
-                    
+                  
                 }
-                
               }
-            }
-            
-            //ingestion
-              
               
-            try {
-              activities.ingestDocumentWithException(documentIdentifier, versionString, documentURI, rd);
-            } catch (IOException e) {
-              errorCode = "IO ERROR";
-              errorDesc = e.getMessage();
-              handleIOException(e, "reading file input stream");
-            }
-          } finally {
-            try {
-              if(is!=null){
-                is.close();
+              //ingestion
+                
+                
+              try {
+                activities.ingestDocumentWithException(documentIdentifier, versionString, documentURI, rd);
+                fileLengthLong = new Long(fileLength);
+                errorCode = "OK";
+              } catch (IOException e) {
+                if (e instanceof InterruptedIOException && !(e instanceof java.net.SocketTimeoutException))
+                  errorCode = null;
+                else
+                  errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
+                errorDesc = e.getMessage();
+                handleIOException(e, "reading file input stream");
               }
-            } catch (IOException e) {
-              errorCode = "IO ERROR";
-              errorDesc = e.getMessage();
-              handleIOException(e, "closing file input stream");
             } finally {
-              activities.recordActivity(new Long(startTime), ACTIVITY_READ,
-                fileLength, documentIdentifier, errorCode, errorDesc, null);
+              try {
+                if(is!=null){
+                  is.close();
+                }
+              } catch (IOException e) {
+                if (e instanceof InterruptedIOException && !(e instanceof java.net.SocketTimeoutException))
+                  errorCode = null;
+                else
+                  errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
+                errorDesc = e.getMessage();
+                handleIOException(e, "closing file input stream");
+              }
             }
+          } else {
+            // Unrecognized document type
+            activities.noDocument(documentIdentifier,versionString);
+            errorCode = "UNKNOWNTYPE";
+            errorDesc = "Document type is unrecognized: '"+baseTypeId+"'";
           }
+        } finally {
+          if (errorCode != null)
+            activities.recordActivity(new Long(startTime), ACTIVITY_READ,
+              fileLengthLong, documentIdentifier, errorCode, errorDesc, null);
         }
-        else
-          activities.noDocument(documentIdentifier,versionString);
       }
     }