You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by ka...@apache.org on 2017/04/14 20:31:28 UTC
svn commit: r1791409 - in /manifoldcf/trunk: CHANGES.txt
connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConnector.java
Author: kamaci
Date: Fri Apr 14 20:31:28 2017
New Revision: 1791409
URL: http://svn.apache.org/viewvc?rev=1791409&view=rev
Log:
Fix for CONNECTORS-1409.
Modified:
manifoldcf/trunk/CHANGES.txt
manifoldcf/trunk/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConnector.java
Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1791409&r1=1791408&r2=1791409&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Fri Apr 14 20:31:28 2017
@@ -3,10 +3,13 @@ $Id$
======================= 2.7-dev =====================
+CONNECTORS-1409: Fix re-processing email bug.
+(Furkan KAMACI)
+
CONNECTORS-1407: Extract email addresses from email metadata fields.
(Furkan KAMACI)
-CONNECTORS-1406: Fix multiple To and From field bug at e-mail.
+CONNECTORS-1406: Fix multiple To and From field bug at email.
(Furkan KAMACI)
CONNECTORS-1405: Add filename metadata information for email.
Modified: manifoldcf/trunk/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConnector.java?rev=1791409&r1=1791408&r2=1791409&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConnector.java (original)
+++ manifoldcf/trunk/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConnector.java Fri Apr 14 20:31:28 2017
@@ -24,19 +24,23 @@ import org.apache.manifoldcf.agents.inte
import org.apache.manifoldcf.agents.interfaces.ServiceInterruption;
import org.apache.manifoldcf.core.interfaces.*;
import org.apache.manifoldcf.core.util.URLEncoder;
-import org.apache.manifoldcf.crawler.interfaces.*;
+import org.apache.manifoldcf.crawler.interfaces.IExistingVersions;
+import org.apache.manifoldcf.crawler.interfaces.IProcessActivity;
+import org.apache.manifoldcf.crawler.interfaces.ISeedingActivity;
import org.apache.manifoldcf.crawler.system.Logging;
-import java.io.*;
+import javax.mail.*;
+import javax.mail.internet.MimeBodyPart;
+import javax.mail.internet.MimeMessage;
+import javax.mail.search.*;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InterruptedIOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import javax.mail.*;
-import javax.mail.internet.MimeBodyPart;
-import javax.mail.internet.MimeMessage;
-import javax.mail.search.*;
/**
* This interface describes an instance of a connection between a repository and ManifoldCF's
@@ -641,10 +645,7 @@ public class EmailConnector extends org.
String[] encoding = new String[mp.getCount()];
for (int k = 0, n = mp.getCount(); k < n; k++) {
Part part = mp.getBodyPart(k);
- String disposition = part.getDisposition();
- if ((disposition != null) &&
- ((disposition.toLowerCase(Locale.ROOT).equals(Part.ATTACHMENT) ||
- (disposition.toLowerCase(Locale.ROOT).equals(Part.INLINE))))) {
+ if (isAttachment(part)) {
final String[] fileSplit = part.getFileName().split("\\?");
if (fileSplit.length > 1) {
encoding[k] = fileSplit[1];
@@ -666,10 +667,7 @@ public class EmailConnector extends org.
String[] MIMEType = new String[mp.getCount()];
for (int k = 0, n = mp.getCount(); k < n; k++) {
Part part = mp.getBodyPart(k);
- String disposition = part.getDisposition();
- if ((disposition != null) &&
- ((disposition.toLowerCase(Locale.ROOT).equals(Part.ATTACHMENT) ||
- (disposition.toLowerCase(Locale.ROOT).equals(Part.INLINE))))) {
+ if (isAttachment(part)) {
MIMEType[k] = part.getContentType();
}
@@ -687,10 +685,7 @@ public class EmailConnector extends org.
String[] fileNames = new String[mp.getCount()];
for (int k = 0, n = mp.getCount(); k < n; k++) {
Part part = mp.getBodyPart(k);
- String disposition = part.getDisposition();
- if ((disposition != null) &&
- ((disposition.toLowerCase(Locale.ROOT).equals(Part.ATTACHMENT) ||
- (disposition.toLowerCase(Locale.ROOT).equals(Part.INLINE))))) {
+ if (isAttachment(part)) {
fileNames[k] = part.getFileName();
}
}
@@ -718,7 +713,9 @@ public class EmailConnector extends org.
final Multipart mp = (Multipart) msg.getContent();
final int numAttachments = mp.getCount();
for (int i = 0; i < numAttachments; i++) {
- activities.addDocumentReference(documentIdentifier + ":" + i);
+ if (isAttachment(mp.getBodyPart(i))) {
+ activities.addDocumentReference(documentIdentifier + ":" + i);
+ }
}
}
}
@@ -935,6 +932,18 @@ public class EmailConnector extends org.
}
+ /**
+ * Checks whether a Part is an attachment or not
+ * @param part Part to check
+ * @return is attachment or not
+ */
+ private boolean isAttachment(Part part) throws MessagingException {
+ String disposition = part.getDisposition();
+ return ((disposition != null)
+ && ((disposition.toLowerCase(Locale.ROOT).equals(Part.ATTACHMENT)
+ || (disposition.toLowerCase(Locale.ROOT).equals(Part.INLINE)))));
+ }
+
/**
* Extracts e-mail address within < and > characters if any.
* If not, returns passed raw mail address.