You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2017/02/07 19:37:19 UTC

svn commit: r1782061 - in /manifoldcf/trunk: ./ connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/ connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/ connectors/email/c...

Author: kwright
Date: Tue Feb  7 19:37:18 2017
New Revision: 1782061

URL: http://svn.apache.org/viewvc?rev=1782061&view=rev
Log:
Tentative fix for CONNECTORS-1375.

Modified:
    manifoldcf/trunk/CHANGES.txt
    manifoldcf/trunk/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConfig.java
    manifoldcf/trunk/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConnector.java
    manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_en_US.properties
    manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_es_ES.properties
    manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_ja_JP.properties
    manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_zh_CN.properties
    manifoldcf/trunk/connectors/email/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/email/Configuration_URL.html
    manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java

Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1782061&r1=1782060&r2=1782061&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Tue Feb  7 19:37:18 2017
@@ -3,6 +3,10 @@ $Id$
 
 ======================= 2.7-dev =====================
 
+CONNECTORS-1375: Add email attachment support, done as separate
+documents.
+(Cihad Guzel, Karl Wright)
+
 CONNECTORS-1371: Fix RSS connector to use same SSL support as
 the web connector.
 (Karl Wright)

Modified: manifoldcf/trunk/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConfig.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConfig.java?rev=1782061&r1=1782060&r2=1782061&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConfig.java (original)
+++ manifoldcf/trunk/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConfig.java Tue Feb  7 19:37:18 2017
@@ -53,6 +53,11 @@ public class EmailConfig {
   */
   public static final String URL_PARAM = "url";
   
+  /**
+  * Attachment URL template
+  */
+  public static final String ATTACHMENT_URL_PARAM = "attachmenturl";
+  
   // Protocol options
   
   public static final String PROTOCOL_IMAP = "IMAP";

Modified: manifoldcf/trunk/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConnector.java?rev=1782061&r1=1782060&r2=1782061&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConnector.java (original)
+++ manifoldcf/trunk/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConnector.java Tue Feb  7 19:37:18 2017
@@ -85,6 +85,7 @@ public class EmailConnector extends org.
   protected String protocol = null;
   protected Properties properties = null;
   protected String urlTemplate = null;
+  protected String attachmentUrlTemplate = null;
   
   // Local session handle
   protected EmailSession session = null;
@@ -115,6 +116,7 @@ public class EmailConnector extends org.
     this.username = configParameters.getParameter(EmailConfig.USERNAME_PARAM);
     this.password = configParameters.getObfuscatedParameter(EmailConfig.PASSWORD_PARAM);
     this.urlTemplate = configParameters.getParameter(EmailConfig.URL_PARAM);
+    this.attachmentUrlTemplate = configParameters.getParameter(EmailConfig.ATTACHMENT_URL_PARAM);
     this.properties = new Properties();
     int i = 0;
     while (i < configParameters.getChildCount()) //In post property set is added as a configuration node
@@ -135,6 +137,7 @@ public class EmailConnector extends org.
   @Override
   public void disconnect()
     throws ManifoldCFException {
+    this.attachmentUrlTemplate = null;
     this.urlTemplate = null;
     this.server = null;
     this.portString = null;
@@ -492,197 +495,341 @@ public class EmailConnector extends org.
     try {
 
       for (String documentIdentifier : documentIdentifiers) {
-        String versionString = "_" + urlTemplate;   // NOT empty; we need to make ManifoldCF understand that this is a document that never will change.
-        
-        // Check if we need to index
-        if (!activities.checkDocumentNeedsReindexing(documentIdentifier,versionString))
-          continue;
-        
-        String compositeID = documentIdentifier;
-        String version = versionString;
-        String folderName = extractFolderNameFromDocumentIdentifier(compositeID);
-        String id = extractEmailIDFromDocumentIdentifier(compositeID);
-        
-        String errorCode = null;
-        String errorDesc = null;
-        Long fileLengthLong = null;
-        long startTime = System.currentTimeMillis();
-        try {
+        final int attachmentIndex = documentIdentifier.indexOf("/");
+        if (attachmentIndex == -1) {
+          // It's an email
+          String versionString = "_" + urlTemplate;   // NOT empty; we need to make ManifoldCF understand that this is a document that never will change.
+          
+          // Check if we need to index
+          if (!activities.checkDocumentNeedsReindexing(documentIdentifier,versionString))
+            continue;
+          
+          String compositeID = documentIdentifier;
+          String version = versionString;
+          String folderName = extractFolderNameFromDocumentIdentifier(compositeID);
+          String id = extractEmailIDFromDocumentIdentifier(compositeID);
+          
+          String errorCode = null;
+          String errorDesc = null;
+          Long fileLengthLong = null;
+          long startTime = System.currentTimeMillis();
           try {
-            Folder folder = openFolders.get(folderName);
-            if (folder == null)
-            {
-              getSession();
-              OpenFolderThread oft = new OpenFolderThread(session, folderName);
-              oft.start();
-              folder = oft.finishUp();
-              openFolders.put(folderName,folder);
-            }
-            
-            if (Logging.connectors.isDebugEnabled())
-              Logging.connectors.debug("Email: Processing document identifier '"
-                + compositeID + "'");
-            SearchTerm messageIDTerm = new MessageIDTerm(id);
+            try {
+              Folder folder = openFolders.get(folderName);
+              if (folder == null)
+              {
+                getSession();
+                OpenFolderThread oft = new OpenFolderThread(session, folderName);
+                oft.start();
+                folder = oft.finishUp();
+                openFolders.put(folderName,folder);
+              }
               
-            getSession();
-            SearchMessagesThread smt = new SearchMessagesThread(session, folder, messageIDTerm);
-            smt.start();
-            Message[] message = smt.finishUp();
-
-            String msgURL = makeDocumentURI(urlTemplate, folderName, id);
-
-            Message msg = null;
-            for (Message msg2 : message) {
-              msg = msg2;
-            }
-            if (msg == null) {
-              // email was not found
-              activities.deleteDocument(documentIdentifier);
-              continue;
-            }
+              if (Logging.connectors.isDebugEnabled())
+                Logging.connectors.debug("Email: Processing document identifier '"
+                  + compositeID + "'");
+              SearchTerm messageIDTerm = new MessageIDTerm(id);
+                
+              getSession();
+              SearchMessagesThread smt = new SearchMessagesThread(session, folder, messageIDTerm);
+              smt.start();
+              Message[] message = smt.finishUp();
+
+              String msgURL = makeDocumentURI(urlTemplate, folderName, id);
+
+              Message msg = null;
+              for (Message msg2 : message) {
+                msg = msg2;
+              }
+              if (msg == null) {
+                // email was not found
+                activities.deleteDocument(documentIdentifier);
+                continue;
+              }
+                
+              if (!activities.checkURLIndexable(msgURL)) {
+                errorCode = activities.EXCLUDED_URL;
+                errorDesc = "Excluded because of URL ('"+msgURL+"')";
+                activities.noDocument(documentIdentifier, version);
+                continue;
+              }
+                
+              long fileLength = msg.getSize();
+              if (!activities.checkLengthIndexable(fileLength)) {
+                errorCode = activities.EXCLUDED_LENGTH;
+                errorDesc = "Excluded because of length ("+fileLength+")";
+                activities.noDocument(documentIdentifier, version);
+                continue;
+              }
+                
+              Date sentDate = msg.getSentDate();
+              if (!activities.checkDateIndexable(sentDate)) {
+                errorCode = activities.EXCLUDED_DATE;
+                errorDesc = "Excluded because of date ("+sentDate+")";
+                activities.noDocument(documentIdentifier, version);
+                continue;
+              }
               
-            if (!activities.checkURLIndexable(msgURL)) {
-              errorCode = activities.EXCLUDED_URL;
-              errorDesc = "Excluded because of URL ('"+msgURL+"')";
-              activities.noDocument(documentIdentifier, version);
-              continue;
-            }
+              String mimeType = "text/plain";
+              if (!activities.checkMimeTypeIndexable(mimeType)) {
+                errorCode = activities.EXCLUDED_MIMETYPE;
+                errorDesc = "Excluded because of mime type ('"+mimeType+"')";
+                activities.noDocument(documentIdentifier, version);
+                continue;
+              }
               
-            long fileLength = msg.getSize();
-            if (!activities.checkLengthIndexable(fileLength)) {
-              errorCode = activities.EXCLUDED_LENGTH;
-              errorDesc = "Excluded because of length ("+fileLength+")";
-              activities.noDocument(documentIdentifier, version);
-              continue;
-            }
+              RepositoryDocument rd = new RepositoryDocument();
+              rd.setFileName(msg.getFileName());
+              rd.setMimeType(mimeType);
+              rd.setCreatedDate(sentDate);
+              rd.setModifiedDate(sentDate);
               
-            Date sentDate = msg.getSentDate();
-            if (!activities.checkDateIndexable(sentDate)) {
-              errorCode = activities.EXCLUDED_DATE;
-              errorDesc = "Excluded because of date ("+sentDate+")";
-              activities.noDocument(documentIdentifier, version);
-              continue;
-            }
-            
-            String mimeType = "text/plain";
-            if (!activities.checkMimeTypeIndexable(mimeType)) {
-              errorCode = activities.EXCLUDED_MIMETYPE;
-              errorDesc = "Excluded because of mime type ('"+mimeType+"')";
-              activities.noDocument(documentIdentifier, version);
-              continue;
-            }
-            
-            RepositoryDocument rd = new RepositoryDocument();
-            rd.setFileName(msg.getFileName());
-            rd.setMimeType(mimeType);
-            rd.setCreatedDate(sentDate);
-            rd.setModifiedDate(sentDate);
-            
-            String subject = StringUtils.EMPTY;
-            for (String metadata : requiredMetadata) {
-              if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_TO)) {
-                Address[] to = msg.getRecipients(Message.RecipientType.TO);
-                String[] toStr = new String[to.length];
-                int j = 0;
-                for (Address address : to) {
-                  toStr[j] = address.toString();
-                }
-                rd.addField(EmailConfig.EMAIL_TO, toStr);
-              } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_FROM)) {
-                Address[] from = msg.getFrom();
-                String[] fromStr = new String[from.length];
-                int j = 0;
-                for (Address address : from) {
-                  fromStr[j] = address.toString();
-                }
-                rd.addField(EmailConfig.EMAIL_FROM, fromStr);
+              String subject = StringUtils.EMPTY;
+              for (String metadata : requiredMetadata) {
+                if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_TO)) {
+                  Address[] to = msg.getRecipients(Message.RecipientType.TO);
+                  String[] toStr = new String[to.length];
+                  int j = 0;
+                  for (Address address : to) {
+                    toStr[j] = address.toString();
+                  }
+                  rd.addField(EmailConfig.EMAIL_TO, toStr);
+                } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_FROM)) {
+                  Address[] from = msg.getFrom();
+                  String[] fromStr = new String[from.length];
+                  int j = 0;
+                  for (Address address : from) {
+                    fromStr[j] = address.toString();
+                  }
+                  rd.addField(EmailConfig.EMAIL_FROM, fromStr);
 
-              } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_SUBJECT)) {
-                subject = msg.getSubject();
-                rd.addField(EmailConfig.EMAIL_SUBJECT, subject);
-              } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_BODY)) {
-                Object o = msg.getContent();
-                if (o instanceof Multipart) {
+                } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_SUBJECT)) {
+                  subject = msg.getSubject();
+                  rd.addField(EmailConfig.EMAIL_SUBJECT, subject);
+                } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_BODY)) {
+                  Object o = msg.getContent();
+                  if (o instanceof Multipart) {
+                    Multipart mp = (Multipart) msg.getContent();
+                    for (int k = 0, n = mp.getCount(); k < n; k++) {
+                      Part part = mp.getBodyPart(k);
+                      String disposition = part.getDisposition();
+                      if ((disposition == null)) {
+                        MimeBodyPart mbp = (MimeBodyPart) part;
+                        if (mbp.isMimeType(EmailConfig.MIMETYPE_TEXT_PLAIN)) {
+                          rd.addField(EmailConfig.EMAIL_BODY, mbp.getContent().toString());
+                        } else if (mbp.isMimeType(EmailConfig.MIMETYPE_HTML)) {
+                          rd.addField(EmailConfig.EMAIL_BODY, mbp.getContent().toString()); //handle html accordingly. Returns content with html tags
+                        }
+                      }
+                    }
+                  } else if (o instanceof String) {
+                    rd.addField(EmailConfig.EMAIL_BODY, (String)o);
+                  }
+                } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_DATE)) {
+                  rd.addField(EmailConfig.EMAIL_DATE, sentDate.toString());
+                } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_ATTACHMENT_ENCODING)) {
                   Multipart mp = (Multipart) msg.getContent();
-                  for (int k = 0, n = mp.getCount(); k < n; k++) {
-                    Part part = mp.getBodyPart(k);
-                    String disposition = part.getDisposition();
-                    if ((disposition == null)) {
-                      MimeBodyPart mbp = (MimeBodyPart) part;
-                      if (mbp.isMimeType(EmailConfig.MIMETYPE_TEXT_PLAIN)) {
-                        rd.addField(EmailConfig.EMAIL_BODY, mbp.getContent().toString());
-                      } else if (mbp.isMimeType(EmailConfig.MIMETYPE_HTML)) {
-                        rd.addField(EmailConfig.EMAIL_BODY, mbp.getContent().toString()); //handle html accordingly. Returns content with html tags
+                  if (mp != null) {
+                    String[] encoding = new String[mp.getCount()];
+                    for (int k = 0, n = mp.getCount(); k < n; k++) {
+                      Part part = mp.getBodyPart(k);
+                      String disposition = part.getDisposition();
+                      if ((disposition != null) &&
+                          ((disposition.equals(Part.ATTACHMENT) ||
+                              (disposition.equals(Part.INLINE))))) {
+                        encoding[k] = part.getFileName().split("\\?")[1];
+
                       }
                     }
+                    rd.addField(EmailConfig.ENCODING_FIELD, encoding);
                   }
-                } else if (o instanceof String) {
-                  rd.addField(EmailConfig.EMAIL_BODY, (String)o);
-                }
-              } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_DATE)) {
-                rd.addField(EmailConfig.EMAIL_DATE, sentDate.toString());
-              } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_ATTACHMENT_ENCODING)) {
-                Multipart mp = (Multipart) msg.getContent();
-                if (mp != null) {
-                  String[] encoding = new String[mp.getCount()];
+                } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_ATTACHMENT_MIMETYPE)) {
+                  Multipart mp = (Multipart) msg.getContent();
+                  String[] MIMEType = new String[mp.getCount()];
                   for (int k = 0, n = mp.getCount(); k < n; k++) {
                     Part part = mp.getBodyPart(k);
                     String disposition = part.getDisposition();
                     if ((disposition != null) &&
                         ((disposition.equals(Part.ATTACHMENT) ||
                             (disposition.equals(Part.INLINE))))) {
-                      encoding[k] = part.getFileName().split("\\?")[1];
+                      MIMEType[k] = part.getContentType();
 
                     }
                   }
-                  rd.addField(EmailConfig.ENCODING_FIELD, encoding);
+                  rd.addField(EmailConfig.MIMETYPE_FIELD, MIMEType);
                 }
-              } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_ATTACHMENT_MIMETYPE)) {
-                Multipart mp = (Multipart) msg.getContent();
-                String[] MIMEType = new String[mp.getCount()];
-                for (int k = 0, n = mp.getCount(); k < n; k++) {
-                  Part part = mp.getBodyPart(k);
-                  String disposition = part.getDisposition();
-                  if ((disposition != null) &&
-                      ((disposition.equals(Part.ATTACHMENT) ||
-                          (disposition.equals(Part.INLINE))))) {
-                    MIMEType[k] = part.getContentType();
-
-                  }
+              }
+                  
+              InputStream is = msg.getInputStream();
+              try {
+                rd.setBinary(is, fileLength);
+                activities.ingestDocumentWithException(documentIdentifier, version, msgURL, rd);
+                errorCode = "OK";
+                fileLengthLong = new Long(fileLength);
+              } finally {
+                is.close();
+              }
+              
+              // If we're supposed to deal with attachments, this is the time to queue them up
+              if (attachmentUrlTemplate != null) {
+                final Multipart mp = (Multipart) msg.getContent();
+                final int numAttachments = mp.getCount();
+                for (int i = 0; i < numAttachments; i++) {
+                  activities.addDocumentReference(documentIdentifier + "/" + i);
                 }
-                rd.addField(EmailConfig.MIMETYPE_FIELD, MIMEType);
               }
+              
+            } catch (InterruptedException e) {
+              throw new ManifoldCFException(e.getMessage(), ManifoldCFException.INTERRUPTED);
+            } catch (MessagingException e) {
+              errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
+              errorDesc = e.getMessage();
+              handleMessagingException(e, "processing email");
+            } catch (IOException e) {
+              errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
+              errorDesc = e.getMessage();
+              handleIOException(e, "processing email");
+              throw new ManifoldCFException(e.getMessage(), e);
             }
-                
-            InputStream is = msg.getInputStream();
+          } catch (ManifoldCFException e) {
+            if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
+              errorCode = null;
+            throw e;
+          } finally {
+            if (errorCode != null)
+              activities.recordActivity(new Long(startTime),EmailConfig.ACTIVITY_FETCH,
+                fileLengthLong,documentIdentifier,errorCode,errorDesc,null);
+          }
+        } else {
+          // It's a specific attachment
+          final int attachmentNumber = Integer.parseInt(documentIdentifier.substring(attachmentIndex + 1));
+          final String origDocumentIdentifier = documentIdentifier.substring(0, attachmentIndex);
+
+          String versionString = "_" + attachmentUrlTemplate;   // NOT empty; we need to make ManifoldCF understand that this is a document that never will change.
+          
+          // Check if we need to index
+          if (!activities.checkDocumentNeedsReindexing(documentIdentifier,versionString))
+            continue;
+          
+          String compositeID = origDocumentIdentifier;
+          String version = versionString;
+          String folderName = extractFolderNameFromDocumentIdentifier(compositeID);
+          String id = extractEmailIDFromDocumentIdentifier(compositeID);
+          
+          String errorCode = null;
+          String errorDesc = null;
+          Long fileLengthLong = null;
+          long startTime = System.currentTimeMillis();
+          try {
             try {
-              rd.setBinary(is, fileLength);
-              activities.ingestDocumentWithException(documentIdentifier, version, msgURL, rd);
-              errorCode = "OK";
-              fileLengthLong = new Long(fileLength);
-            } finally {
-              is.close();
+              Folder folder = openFolders.get(folderName);
+              if (folder == null)
+              {
+                getSession();
+                OpenFolderThread oft = new OpenFolderThread(session, folderName);
+                oft.start();
+                folder = oft.finishUp();
+                openFolders.put(folderName,folder);
+              }
+              
+              if (Logging.connectors.isDebugEnabled())
+                Logging.connectors.debug("Email: Processing document identifier '"
+                  + documentIdentifier + "'");
+              SearchTerm messageIDTerm = new MessageIDTerm(id);
+                
+              getSession();
+              SearchMessagesThread smt = new SearchMessagesThread(session, folder, messageIDTerm);
+              smt.start();
+              Message[] message = smt.finishUp();
+
+              String msgURL = makeDocumentURI(attachmentUrlTemplate, folderName, id, attachmentNumber);
+
+              Message msg = null;
+              for (Message msg2 : message) {
+                msg = msg2;
+              }
+              if (msg == null) {
+                // email was not found
+                activities.deleteDocument(documentIdentifier);
+                continue;
+              }
+                
+              if (!activities.checkURLIndexable(msgURL)) {
+                errorCode = activities.EXCLUDED_URL;
+                errorDesc = "Excluded because of URL ('"+msgURL+"')";
+                activities.noDocument(documentIdentifier, version);
+                continue;
+              }
+
+              final Date sentDate = msg.getSentDate();
+              if (!activities.checkDateIndexable(sentDate)) {
+                errorCode = activities.EXCLUDED_DATE;
+                errorDesc = "Excluded because of date ("+sentDate+")";
+                activities.noDocument(documentIdentifier, version);
+                continue;
+              }
+
+              final Multipart mp = (Multipart) msg.getContent();
+              if (mp.getCount() >= attachmentNumber) {
+                activities.deleteDocument(documentIdentifier);
+                continue;
+              }
+              final Part part = mp.getBodyPart(attachmentNumber);
+                            
+              final long fileLength = part.getSize();
+              if (!activities.checkLengthIndexable(fileLength)) {
+                errorCode = activities.EXCLUDED_LENGTH;
+                errorDesc = "Excluded because of length ("+fileLength+")";
+                activities.noDocument(documentIdentifier, version);
+                continue;
+              }
+                
+              final String mimeType = part.getContentType();
+              if (!activities.checkMimeTypeIndexable(mimeType)) {
+                errorCode = activities.EXCLUDED_MIMETYPE;
+                errorDesc = "Excluded because of mime type ('"+mimeType+"')";
+                activities.noDocument(documentIdentifier, version);
+                continue;
+              }
+
+              RepositoryDocument rd = new RepositoryDocument();
+              rd.setFileName(part.getFileName());
+              rd.setMimeType(mimeType);
+              rd.setCreatedDate(sentDate);
+              rd.setModifiedDate(sentDate);
+
+              final InputStream is = part.getInputStream();
+              try {
+                rd.setBinary(is, fileLength);
+                activities.ingestDocumentWithException(documentIdentifier, version, msgURL, rd);
+                errorCode = "OK";
+                fileLengthLong = new Long(fileLength);
+              } finally {
+                is.close();
+              }
+
+            } catch (InterruptedException e) {
+              throw new ManifoldCFException(e.getMessage(), ManifoldCFException.INTERRUPTED);
+            } catch (MessagingException e) {
+              errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
+              errorDesc = e.getMessage();
+              handleMessagingException(e, "processing email attachment");
+            } catch (IOException e) {
+              errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
+              errorDesc = e.getMessage();
+              handleIOException(e, "processing email attachment");
+              throw new ManifoldCFException(e.getMessage(), e);
             }
-          } catch (InterruptedException e) {
-            throw new ManifoldCFException(e.getMessage(), ManifoldCFException.INTERRUPTED);
-          } catch (MessagingException e) {
-            errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
-            errorDesc = e.getMessage();
-            handleMessagingException(e, "processing email");
-          } catch (IOException e) {
-            errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
-            errorDesc = e.getMessage();
-            handleIOException(e, "processing email");
-            throw new ManifoldCFException(e.getMessage(), e);
+          } catch (ManifoldCFException e) {
+            if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
+              errorCode = null;
+            throw e;
+          } finally {
+            if (errorCode != null)
+              activities.recordActivity(new Long(startTime),EmailConfig.ACTIVITY_FETCH,
+                fileLengthLong,documentIdentifier,errorCode,errorDesc,null);
           }
-        } catch (ManifoldCFException e) {
-          if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
-            errorCode = null;
-          throw e;
-        } finally {
-          if (errorCode != null)
-            activities.recordActivity(new Long(startTime),EmailConfig.ACTIVITY_FETCH,
-              fileLengthLong,documentIdentifier,errorCode,errorDesc,null);
+
         }
       }
     }
@@ -805,10 +952,19 @@ public class EmailConnector extends org.
   private static void fillInURLConfigurationMap(Map<String, Object> paramMap, IPasswordMapperActivity mapper, ConfigParams parameters) {
     String urlTemplate = parameters.getParameter(EmailConfig.URL_PARAM);
 
-    if (urlTemplate == null)
+    if (urlTemplate == null) {
       urlTemplate = "http://sampleserver/$(FOLDERNAME)?id=$(MESSAGEID)";
+    }
 
     paramMap.put("URL", urlTemplate);
+    
+    String attachmentUrlTemplate = parameters.getParameter(EmailConfig.ATTACHMENT_URL_PARAM);
+    
+    if (attachmentUrlTemplate == null) {
+      attachmentUrlTemplate = "http://sampleserver/$(FOLDERNAME)?id=$(MESSAGEID)&attach=$(ATTACHMENTNUMBER)";
+    }
+    
+    paramMap.put("ATTACHMENTURL", attachmentUrlTemplate);
   }
 
   /**
@@ -833,6 +989,10 @@ public class EmailConnector extends org.
     if (urlTemplate != null)
       parameters.setParameter(EmailConfig.URL_PARAM, urlTemplate);
 
+    String attachmentUrlTemplate = variableContext.getParameter("attachmenturl");
+    if (attachmentUrlTemplate != null)
+      parameters.setParameter(EmailConfig.ATTACHMENT_URL_PARAM, attachmentUrlTemplate);
+
     String userName = variableContext.getParameter("username");
     if (userName != null)
       parameters.setParameter(EmailConfig.USERNAME_PARAM, userName);
@@ -1210,6 +1370,20 @@ public class EmailConnector extends org.
       return substitute(urlTemplate, subsMap);
   }
 
+  /** Create a document's URI given a template, a folder name, a message ID, and an attachment number */
+  protected static String makeDocumentURI(String urlTemplate, String folderName, String id, int attachmentNumber)
+  {
+      // First, URL encode folder name and id
+      String encodedFolderName = URLEncoder.encode(folderName);
+      String encodedId = URLEncoder.encode(id);
+      // The template is already URL encoded, except for the substitution points
+      Map<String,String> subsMap = new HashMap<String,String>();
+      subsMap.put("FOLDERNAME", encodedFolderName);
+      subsMap.put("MESSAGEID", encodedId);
+      subsMap.put("ATTACHMENTNUMBER", Integer.toString(attachmentNumber));
+      return substitute(urlTemplate, subsMap);
+  }
+
   protected static String substitute(String template, Map<String,String> map)
   {
     StringBuilder sb = new StringBuilder();

Modified: manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_en_US.properties
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_en_US.properties?rev=1782061&r1=1782060&r2=1782061&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_en_US.properties (original)
+++ manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_en_US.properties Tue Feb  7 19:37:18 2017
@@ -47,8 +47,7 @@ EmailConnector.MetadataName=Metadata nam
 EmailConnector.NoMetadataSpecified=No metadata specified
 EmailConnector.SelectMetadataName=--Select metadata name --
 EmailConnector.IncludedMetadataColon=Included metadata:
-
-
+EmailConnector.AttachmentURLTemplateColon=Attachment URL template (blank if no attachments desired):
 
 
 

Modified: manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_es_ES.properties
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_es_ES.properties?rev=1782061&r1=1782060&r2=1782061&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_es_ES.properties (original)
+++ manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_es_ES.properties Tue Feb  7 19:37:18 2017
@@ -19,27 +19,27 @@ EmailConnector.Metadata=metadatos
 EmailConnector.Filter=Filtro
 
 EmailConnector.EnterAMailServerHostName=Introduzca un nombre de host del servidor de correo
-EmailConnector.PleaseSelectAConfigurationParameterName=Por favor seleccione un nombre de par�metro de configuraci�n
+EmailConnector.PleaseSelectAConfigurationParameterName=Por favor seleccione un nombre de parámetro de configuración
 EmailConnector.PleaseSelectAMetadataName=Por favor seleccione un nombre de metadatos
 EmailConnector.ValueCannotBeBlank=El valor no puede estar en blanco
 EmailConnector.URLTemplateCannotBeBlank=URL plantilla no puede estar en blanco
 
 EmailConnector.URLTemplateColon=URL plantilla:
-EmailConnector.ConfigurationPropertiesColon=propiedades de configuraci�n:
+EmailConnector.ConfigurationPropertiesColon=propiedades de configuración:
 EmailConnector.ProtocolColon=Protocolo:
 EmailConnector.HostNameColon=nombre de host:
 EmailConnector.PortColon=Puerto:
 EmailConnector.UserNameColon=Nombre de usuario:
-EmailConnector.PasswordColon=Contrase�a:
+EmailConnector.PasswordColon=Contraseña:
 EmailConnector.MatchesColon=Correspondecia:
 EmailConnector.FoldersColon=Carpetas:
 EmailConnector.RecordFilterColon=filtro de registros:
 EmailConnector.ServerProperty=propiedad del servidor
 EmailConnector.Value=Valor
 EmailConnector.NoServerPropertiesSpecified=No hay propiedades de servidor especificados
-EmailConnector.AddNewMatch=A�adir una nueva correspondecia
-EmailConnector.AddNewProperty=A�adir nueva propiedad
-EmailConnector.Add=a�adir
+EmailConnector.AddNewMatch=Añadir una nueva correspondecia
+EmailConnector.AddNewProperty=Añadir nueva propiedad
+EmailConnector.Add=añadir
 EmailConnector.DeleteMatchNumber=eliminar correspondencia #
 EmailConnector.DeletePropertyNumber=eliminar propiedad #
 EmailConnector.Delete=borrar
@@ -47,6 +47,7 @@ EmailConnector.MetadataName=nombre de me
 EmailConnector.NoMetadataSpecified=Sin metadatos especificada
 EmailConnector.SelectMetadataName=--Seleccione el nombre de metadatos --
 EmailConnector.IncludedMetadataColon=metadatos Incluido:
+EmailConnector.AttachmentURLTemplateColon=Attachment URL template (blank if no attachments desired):
 
 
 

Modified: manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_ja_JP.properties
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_ja_JP.properties?rev=1782061&r1=1782060&r2=1782061&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_ja_JP.properties (original)
+++ manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_ja_JP.properties Tue Feb  7 19:37:18 2017
@@ -47,6 +47,7 @@ EmailConnector.MetadataName=Metadata nam
 EmailConnector.NoMetadataSpecified=No metadata specified
 EmailConnector.SelectMetadataName=--Select metadata name --
 EmailConnector.IncludedMetadataColon=Included metadata:
+EmailConnector.AttachmentURLTemplateColon=Attachment URL template (blank if no attachments desired):
 
 
 

Modified: manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_zh_CN.properties
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_zh_CN.properties?rev=1782061&r1=1782060&r2=1782061&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_zh_CN.properties (original)
+++ manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_zh_CN.properties Tue Feb  7 19:37:18 2017
@@ -47,6 +47,7 @@ EmailConnector.MetadataName=元数
 EmailConnector.NoMetadataSpecified=元数据未指定
 EmailConnector.SelectMetadataName=-- 选择元数据名 --
 EmailConnector.IncludedMetadataColon=被包含的元数据:
+EmailConnector.AttachmentURLTemplateColon=Attachment URL template (blank if no attachments desired):
 
 
 

Modified: manifoldcf/trunk/connectors/email/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/email/Configuration_URL.html
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/email/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/email/Configuration_URL.html?rev=1782061&r1=1782060&r2=1782061&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/email/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/email/Configuration_URL.html (original)
+++ manifoldcf/trunk/connectors/email/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/email/Configuration_URL.html Tue Feb  7 19:37:18 2017
@@ -27,6 +27,14 @@ limitations under the License.
       <nobr><input type="text" name="url" size="60" value="$Encoder.attributeEscape($URL)"/></nobr>
     </td>
   </tr>
+  <tr>
+    <td class="description">
+      <nobr>$Encoder.bodyEscape($ResourceBundle.getString('EmailConnector.AttachmentURLTemplateColon'))</nobr>
+    </td>
+    <td class="value">
+      <nobr><input type="text" name="attachmenturl" size="60" value="$Encoder.attributeEscape($ATTACHMENTURL)"/></nobr>
+    </td>
+  </tr>
 </table>
 
 #else

Modified: manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java?rev=1782061&r1=1782060&r2=1782061&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java (original)
+++ manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java Tue Feb  7 19:37:18 2017
@@ -1100,7 +1100,8 @@ public class HttpPoster
       throws IOException
     {
       ModifiableSolrParams out = new ModifiableSolrParams();
-          
+      Logging.ingest.debug("Solr: Writing document '"+documentURI);
+      
       // Write the id field
       writeField(out,LITERAL+idAttributeName,documentURI);
       // Write the rest of the attributes
@@ -1179,6 +1180,8 @@ public class HttpPoster
       contentStreamUpdateRequest.setParams(out);
           
       contentStreamUpdateRequest.addContentStream(new RepositoryDocumentStream(is,length,contentType,contentName));
+      
+      Logging.ingest.debug("Solr: Done writing '"+documentURI+"'");
     }
 
     /**
@@ -1196,6 +1199,7 @@ public class HttpPoster
       {
         String originalFieldName = iter.next();
         String fieldName = makeSafeLuceneField(originalFieldName);
+        Logging.ingest.debug("Solr: Saw field '"+originalFieldName+"'; converted to '"+fieldName+"'");
         applySingleMapping(originalFieldName, out, fieldName);
       }
     }