You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2017/02/07 19:37:19 UTC
svn commit: r1782061 - in /manifoldcf/trunk: ./
connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/
connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/
connectors/email/c...
Author: kwright
Date: Tue Feb 7 19:37:18 2017
New Revision: 1782061
URL: http://svn.apache.org/viewvc?rev=1782061&view=rev
Log:
Tentative fix for CONNECTORS-1375.
Modified:
manifoldcf/trunk/CHANGES.txt
manifoldcf/trunk/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConfig.java
manifoldcf/trunk/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConnector.java
manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_en_US.properties
manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_es_ES.properties
manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_ja_JP.properties
manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_zh_CN.properties
manifoldcf/trunk/connectors/email/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/email/Configuration_URL.html
manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java
Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1782061&r1=1782060&r2=1782061&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Tue Feb 7 19:37:18 2017
@@ -3,6 +3,10 @@ $Id$
======================= 2.7-dev =====================
+CONNECTORS-1375: Add email attachment support, done as separate
+documents.
+(Cihad Guzel, Karl Wright)
+
CONNECTORS-1371: Fix RSS connector to use same SSL support as
the web connector.
(Karl Wright)
Modified: manifoldcf/trunk/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConfig.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConfig.java?rev=1782061&r1=1782060&r2=1782061&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConfig.java (original)
+++ manifoldcf/trunk/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConfig.java Tue Feb 7 19:37:18 2017
@@ -53,6 +53,11 @@ public class EmailConfig {
*/
public static final String URL_PARAM = "url";
+ /**
+ * Attachment URL template
+ */
+ public static final String ATTACHMENT_URL_PARAM = "attachmenturl";
+
// Protocol options
public static final String PROTOCOL_IMAP = "IMAP";
Modified: manifoldcf/trunk/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConnector.java?rev=1782061&r1=1782060&r2=1782061&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConnector.java (original)
+++ manifoldcf/trunk/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConnector.java Tue Feb 7 19:37:18 2017
@@ -85,6 +85,7 @@ public class EmailConnector extends org.
protected String protocol = null;
protected Properties properties = null;
protected String urlTemplate = null;
+ protected String attachmentUrlTemplate = null;
// Local session handle
protected EmailSession session = null;
@@ -115,6 +116,7 @@ public class EmailConnector extends org.
this.username = configParameters.getParameter(EmailConfig.USERNAME_PARAM);
this.password = configParameters.getObfuscatedParameter(EmailConfig.PASSWORD_PARAM);
this.urlTemplate = configParameters.getParameter(EmailConfig.URL_PARAM);
+ this.attachmentUrlTemplate = configParameters.getParameter(EmailConfig.ATTACHMENT_URL_PARAM);
this.properties = new Properties();
int i = 0;
while (i < configParameters.getChildCount()) //In post property set is added as a configuration node
@@ -135,6 +137,7 @@ public class EmailConnector extends org.
@Override
public void disconnect()
throws ManifoldCFException {
+ this.attachmentUrlTemplate = null;
this.urlTemplate = null;
this.server = null;
this.portString = null;
@@ -492,197 +495,341 @@ public class EmailConnector extends org.
try {
for (String documentIdentifier : documentIdentifiers) {
- String versionString = "_" + urlTemplate; // NOT empty; we need to make ManifoldCF understand that this is a document that never will change.
-
- // Check if we need to index
- if (!activities.checkDocumentNeedsReindexing(documentIdentifier,versionString))
- continue;
-
- String compositeID = documentIdentifier;
- String version = versionString;
- String folderName = extractFolderNameFromDocumentIdentifier(compositeID);
- String id = extractEmailIDFromDocumentIdentifier(compositeID);
-
- String errorCode = null;
- String errorDesc = null;
- Long fileLengthLong = null;
- long startTime = System.currentTimeMillis();
- try {
+ final int attachmentIndex = documentIdentifier.indexOf("/");
+ if (attachmentIndex == -1) {
+ // It's an email
+ String versionString = "_" + urlTemplate; // NOT empty; we need to make ManifoldCF understand that this is a document that never will change.
+
+ // Check if we need to index
+ if (!activities.checkDocumentNeedsReindexing(documentIdentifier,versionString))
+ continue;
+
+ String compositeID = documentIdentifier;
+ String version = versionString;
+ String folderName = extractFolderNameFromDocumentIdentifier(compositeID);
+ String id = extractEmailIDFromDocumentIdentifier(compositeID);
+
+ String errorCode = null;
+ String errorDesc = null;
+ Long fileLengthLong = null;
+ long startTime = System.currentTimeMillis();
try {
- Folder folder = openFolders.get(folderName);
- if (folder == null)
- {
- getSession();
- OpenFolderThread oft = new OpenFolderThread(session, folderName);
- oft.start();
- folder = oft.finishUp();
- openFolders.put(folderName,folder);
- }
-
- if (Logging.connectors.isDebugEnabled())
- Logging.connectors.debug("Email: Processing document identifier '"
- + compositeID + "'");
- SearchTerm messageIDTerm = new MessageIDTerm(id);
+ try {
+ Folder folder = openFolders.get(folderName);
+ if (folder == null)
+ {
+ getSession();
+ OpenFolderThread oft = new OpenFolderThread(session, folderName);
+ oft.start();
+ folder = oft.finishUp();
+ openFolders.put(folderName,folder);
+ }
- getSession();
- SearchMessagesThread smt = new SearchMessagesThread(session, folder, messageIDTerm);
- smt.start();
- Message[] message = smt.finishUp();
-
- String msgURL = makeDocumentURI(urlTemplate, folderName, id);
-
- Message msg = null;
- for (Message msg2 : message) {
- msg = msg2;
- }
- if (msg == null) {
- // email was not found
- activities.deleteDocument(documentIdentifier);
- continue;
- }
+ if (Logging.connectors.isDebugEnabled())
+ Logging.connectors.debug("Email: Processing document identifier '"
+ + compositeID + "'");
+ SearchTerm messageIDTerm = new MessageIDTerm(id);
+
+ getSession();
+ SearchMessagesThread smt = new SearchMessagesThread(session, folder, messageIDTerm);
+ smt.start();
+ Message[] message = smt.finishUp();
+
+ String msgURL = makeDocumentURI(urlTemplate, folderName, id);
+
+ Message msg = null;
+ for (Message msg2 : message) {
+ msg = msg2;
+ }
+ if (msg == null) {
+ // email was not found
+ activities.deleteDocument(documentIdentifier);
+ continue;
+ }
+
+ if (!activities.checkURLIndexable(msgURL)) {
+ errorCode = activities.EXCLUDED_URL;
+ errorDesc = "Excluded because of URL ('"+msgURL+"')";
+ activities.noDocument(documentIdentifier, version);
+ continue;
+ }
+
+ long fileLength = msg.getSize();
+ if (!activities.checkLengthIndexable(fileLength)) {
+ errorCode = activities.EXCLUDED_LENGTH;
+ errorDesc = "Excluded because of length ("+fileLength+")";
+ activities.noDocument(documentIdentifier, version);
+ continue;
+ }
+
+ Date sentDate = msg.getSentDate();
+ if (!activities.checkDateIndexable(sentDate)) {
+ errorCode = activities.EXCLUDED_DATE;
+ errorDesc = "Excluded because of date ("+sentDate+")";
+ activities.noDocument(documentIdentifier, version);
+ continue;
+ }
- if (!activities.checkURLIndexable(msgURL)) {
- errorCode = activities.EXCLUDED_URL;
- errorDesc = "Excluded because of URL ('"+msgURL+"')";
- activities.noDocument(documentIdentifier, version);
- continue;
- }
+ String mimeType = "text/plain";
+ if (!activities.checkMimeTypeIndexable(mimeType)) {
+ errorCode = activities.EXCLUDED_MIMETYPE;
+ errorDesc = "Excluded because of mime type ('"+mimeType+"')";
+ activities.noDocument(documentIdentifier, version);
+ continue;
+ }
- long fileLength = msg.getSize();
- if (!activities.checkLengthIndexable(fileLength)) {
- errorCode = activities.EXCLUDED_LENGTH;
- errorDesc = "Excluded because of length ("+fileLength+")";
- activities.noDocument(documentIdentifier, version);
- continue;
- }
+ RepositoryDocument rd = new RepositoryDocument();
+ rd.setFileName(msg.getFileName());
+ rd.setMimeType(mimeType);
+ rd.setCreatedDate(sentDate);
+ rd.setModifiedDate(sentDate);
- Date sentDate = msg.getSentDate();
- if (!activities.checkDateIndexable(sentDate)) {
- errorCode = activities.EXCLUDED_DATE;
- errorDesc = "Excluded because of date ("+sentDate+")";
- activities.noDocument(documentIdentifier, version);
- continue;
- }
-
- String mimeType = "text/plain";
- if (!activities.checkMimeTypeIndexable(mimeType)) {
- errorCode = activities.EXCLUDED_MIMETYPE;
- errorDesc = "Excluded because of mime type ('"+mimeType+"')";
- activities.noDocument(documentIdentifier, version);
- continue;
- }
-
- RepositoryDocument rd = new RepositoryDocument();
- rd.setFileName(msg.getFileName());
- rd.setMimeType(mimeType);
- rd.setCreatedDate(sentDate);
- rd.setModifiedDate(sentDate);
-
- String subject = StringUtils.EMPTY;
- for (String metadata : requiredMetadata) {
- if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_TO)) {
- Address[] to = msg.getRecipients(Message.RecipientType.TO);
- String[] toStr = new String[to.length];
- int j = 0;
- for (Address address : to) {
- toStr[j] = address.toString();
- }
- rd.addField(EmailConfig.EMAIL_TO, toStr);
- } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_FROM)) {
- Address[] from = msg.getFrom();
- String[] fromStr = new String[from.length];
- int j = 0;
- for (Address address : from) {
- fromStr[j] = address.toString();
- }
- rd.addField(EmailConfig.EMAIL_FROM, fromStr);
+ String subject = StringUtils.EMPTY;
+ for (String metadata : requiredMetadata) {
+ if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_TO)) {
+ Address[] to = msg.getRecipients(Message.RecipientType.TO);
+ String[] toStr = new String[to.length];
+ int j = 0;
+ for (Address address : to) {
+ toStr[j] = address.toString();
+ }
+ rd.addField(EmailConfig.EMAIL_TO, toStr);
+ } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_FROM)) {
+ Address[] from = msg.getFrom();
+ String[] fromStr = new String[from.length];
+ int j = 0;
+ for (Address address : from) {
+ fromStr[j] = address.toString();
+ }
+ rd.addField(EmailConfig.EMAIL_FROM, fromStr);
- } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_SUBJECT)) {
- subject = msg.getSubject();
- rd.addField(EmailConfig.EMAIL_SUBJECT, subject);
- } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_BODY)) {
- Object o = msg.getContent();
- if (o instanceof Multipart) {
+ } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_SUBJECT)) {
+ subject = msg.getSubject();
+ rd.addField(EmailConfig.EMAIL_SUBJECT, subject);
+ } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_BODY)) {
+ Object o = msg.getContent();
+ if (o instanceof Multipart) {
+ Multipart mp = (Multipart) msg.getContent();
+ for (int k = 0, n = mp.getCount(); k < n; k++) {
+ Part part = mp.getBodyPart(k);
+ String disposition = part.getDisposition();
+ if ((disposition == null)) {
+ MimeBodyPart mbp = (MimeBodyPart) part;
+ if (mbp.isMimeType(EmailConfig.MIMETYPE_TEXT_PLAIN)) {
+ rd.addField(EmailConfig.EMAIL_BODY, mbp.getContent().toString());
+ } else if (mbp.isMimeType(EmailConfig.MIMETYPE_HTML)) {
+ rd.addField(EmailConfig.EMAIL_BODY, mbp.getContent().toString()); //handle html accordingly. Returns content with html tags
+ }
+ }
+ }
+ } else if (o instanceof String) {
+ rd.addField(EmailConfig.EMAIL_BODY, (String)o);
+ }
+ } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_DATE)) {
+ rd.addField(EmailConfig.EMAIL_DATE, sentDate.toString());
+ } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_ATTACHMENT_ENCODING)) {
Multipart mp = (Multipart) msg.getContent();
- for (int k = 0, n = mp.getCount(); k < n; k++) {
- Part part = mp.getBodyPart(k);
- String disposition = part.getDisposition();
- if ((disposition == null)) {
- MimeBodyPart mbp = (MimeBodyPart) part;
- if (mbp.isMimeType(EmailConfig.MIMETYPE_TEXT_PLAIN)) {
- rd.addField(EmailConfig.EMAIL_BODY, mbp.getContent().toString());
- } else if (mbp.isMimeType(EmailConfig.MIMETYPE_HTML)) {
- rd.addField(EmailConfig.EMAIL_BODY, mbp.getContent().toString()); //handle html accordingly. Returns content with html tags
+ if (mp != null) {
+ String[] encoding = new String[mp.getCount()];
+ for (int k = 0, n = mp.getCount(); k < n; k++) {
+ Part part = mp.getBodyPart(k);
+ String disposition = part.getDisposition();
+ if ((disposition != null) &&
+ ((disposition.equals(Part.ATTACHMENT) ||
+ (disposition.equals(Part.INLINE))))) {
+ encoding[k] = part.getFileName().split("\\?")[1];
+
}
}
+ rd.addField(EmailConfig.ENCODING_FIELD, encoding);
}
- } else if (o instanceof String) {
- rd.addField(EmailConfig.EMAIL_BODY, (String)o);
- }
- } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_DATE)) {
- rd.addField(EmailConfig.EMAIL_DATE, sentDate.toString());
- } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_ATTACHMENT_ENCODING)) {
- Multipart mp = (Multipart) msg.getContent();
- if (mp != null) {
- String[] encoding = new String[mp.getCount()];
+ } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_ATTACHMENT_MIMETYPE)) {
+ Multipart mp = (Multipart) msg.getContent();
+ String[] MIMEType = new String[mp.getCount()];
for (int k = 0, n = mp.getCount(); k < n; k++) {
Part part = mp.getBodyPart(k);
String disposition = part.getDisposition();
if ((disposition != null) &&
((disposition.equals(Part.ATTACHMENT) ||
(disposition.equals(Part.INLINE))))) {
- encoding[k] = part.getFileName().split("\\?")[1];
+ MIMEType[k] = part.getContentType();
}
}
- rd.addField(EmailConfig.ENCODING_FIELD, encoding);
+ rd.addField(EmailConfig.MIMETYPE_FIELD, MIMEType);
}
- } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_ATTACHMENT_MIMETYPE)) {
- Multipart mp = (Multipart) msg.getContent();
- String[] MIMEType = new String[mp.getCount()];
- for (int k = 0, n = mp.getCount(); k < n; k++) {
- Part part = mp.getBodyPart(k);
- String disposition = part.getDisposition();
- if ((disposition != null) &&
- ((disposition.equals(Part.ATTACHMENT) ||
- (disposition.equals(Part.INLINE))))) {
- MIMEType[k] = part.getContentType();
-
- }
+ }
+
+ InputStream is = msg.getInputStream();
+ try {
+ rd.setBinary(is, fileLength);
+ activities.ingestDocumentWithException(documentIdentifier, version, msgURL, rd);
+ errorCode = "OK";
+ fileLengthLong = new Long(fileLength);
+ } finally {
+ is.close();
+ }
+
+ // If we're supposed to deal with attachments, this is the time to queue them up
+ if (attachmentUrlTemplate != null) {
+ final Multipart mp = (Multipart) msg.getContent();
+ final int numAttachments = mp.getCount();
+ for (int i = 0; i < numAttachments; i++) {
+ activities.addDocumentReference(documentIdentifier + "/" + i);
}
- rd.addField(EmailConfig.MIMETYPE_FIELD, MIMEType);
}
+
+ } catch (InterruptedException e) {
+ throw new ManifoldCFException(e.getMessage(), ManifoldCFException.INTERRUPTED);
+ } catch (MessagingException e) {
+ errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
+ errorDesc = e.getMessage();
+ handleMessagingException(e, "processing email");
+ } catch (IOException e) {
+ errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
+ errorDesc = e.getMessage();
+ handleIOException(e, "processing email");
+ throw new ManifoldCFException(e.getMessage(), e);
}
-
- InputStream is = msg.getInputStream();
+ } catch (ManifoldCFException e) {
+ if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
+ errorCode = null;
+ throw e;
+ } finally {
+ if (errorCode != null)
+ activities.recordActivity(new Long(startTime),EmailConfig.ACTIVITY_FETCH,
+ fileLengthLong,documentIdentifier,errorCode,errorDesc,null);
+ }
+ } else {
+ // It's a specific attachment
+ final int attachmentNumber = Integer.parseInt(documentIdentifier.substring(attachmentIndex + 1));
+ final String origDocumentIdentifier = documentIdentifier.substring(0, attachmentIndex);
+
+ String versionString = "_" + attachmentUrlTemplate; // NOT empty; we need to make ManifoldCF understand that this is a document that never will change.
+
+ // Check if we need to index
+ if (!activities.checkDocumentNeedsReindexing(documentIdentifier,versionString))
+ continue;
+
+ String compositeID = origDocumentIdentifier;
+ String version = versionString;
+ String folderName = extractFolderNameFromDocumentIdentifier(compositeID);
+ String id = extractEmailIDFromDocumentIdentifier(compositeID);
+
+ String errorCode = null;
+ String errorDesc = null;
+ Long fileLengthLong = null;
+ long startTime = System.currentTimeMillis();
+ try {
try {
- rd.setBinary(is, fileLength);
- activities.ingestDocumentWithException(documentIdentifier, version, msgURL, rd);
- errorCode = "OK";
- fileLengthLong = new Long(fileLength);
- } finally {
- is.close();
+ Folder folder = openFolders.get(folderName);
+ if (folder == null)
+ {
+ getSession();
+ OpenFolderThread oft = new OpenFolderThread(session, folderName);
+ oft.start();
+ folder = oft.finishUp();
+ openFolders.put(folderName,folder);
+ }
+
+ if (Logging.connectors.isDebugEnabled())
+ Logging.connectors.debug("Email: Processing document identifier '"
+ + documentIdentifier + "'");
+ SearchTerm messageIDTerm = new MessageIDTerm(id);
+
+ getSession();
+ SearchMessagesThread smt = new SearchMessagesThread(session, folder, messageIDTerm);
+ smt.start();
+ Message[] message = smt.finishUp();
+
+ String msgURL = makeDocumentURI(attachmentUrlTemplate, folderName, id, attachmentNumber);
+
+ Message msg = null;
+ for (Message msg2 : message) {
+ msg = msg2;
+ }
+ if (msg == null) {
+ // email was not found
+ activities.deleteDocument(documentIdentifier);
+ continue;
+ }
+
+ if (!activities.checkURLIndexable(msgURL)) {
+ errorCode = activities.EXCLUDED_URL;
+ errorDesc = "Excluded because of URL ('"+msgURL+"')";
+ activities.noDocument(documentIdentifier, version);
+ continue;
+ }
+
+ final Date sentDate = msg.getSentDate();
+ if (!activities.checkDateIndexable(sentDate)) {
+ errorCode = activities.EXCLUDED_DATE;
+ errorDesc = "Excluded because of date ("+sentDate+")";
+ activities.noDocument(documentIdentifier, version);
+ continue;
+ }
+
+ final Multipart mp = (Multipart) msg.getContent();
+ if (mp.getCount() >= attachmentNumber) {
+ activities.deleteDocument(documentIdentifier);
+ continue;
+ }
+ final Part part = mp.getBodyPart(attachmentNumber);
+
+ final long fileLength = part.getSize();
+ if (!activities.checkLengthIndexable(fileLength)) {
+ errorCode = activities.EXCLUDED_LENGTH;
+ errorDesc = "Excluded because of length ("+fileLength+")";
+ activities.noDocument(documentIdentifier, version);
+ continue;
+ }
+
+ final String mimeType = part.getContentType();
+ if (!activities.checkMimeTypeIndexable(mimeType)) {
+ errorCode = activities.EXCLUDED_MIMETYPE;
+ errorDesc = "Excluded because of mime type ('"+mimeType+"')";
+ activities.noDocument(documentIdentifier, version);
+ continue;
+ }
+
+ RepositoryDocument rd = new RepositoryDocument();
+ rd.setFileName(part.getFileName());
+ rd.setMimeType(mimeType);
+ rd.setCreatedDate(sentDate);
+ rd.setModifiedDate(sentDate);
+
+ final InputStream is = part.getInputStream();
+ try {
+ rd.setBinary(is, fileLength);
+ activities.ingestDocumentWithException(documentIdentifier, version, msgURL, rd);
+ errorCode = "OK";
+ fileLengthLong = new Long(fileLength);
+ } finally {
+ is.close();
+ }
+
+ } catch (InterruptedException e) {
+ throw new ManifoldCFException(e.getMessage(), ManifoldCFException.INTERRUPTED);
+ } catch (MessagingException e) {
+ errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
+ errorDesc = e.getMessage();
+ handleMessagingException(e, "processing email attachment");
+ } catch (IOException e) {
+ errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
+ errorDesc = e.getMessage();
+ handleIOException(e, "processing email attachment");
+ throw new ManifoldCFException(e.getMessage(), e);
}
- } catch (InterruptedException e) {
- throw new ManifoldCFException(e.getMessage(), ManifoldCFException.INTERRUPTED);
- } catch (MessagingException e) {
- errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
- errorDesc = e.getMessage();
- handleMessagingException(e, "processing email");
- } catch (IOException e) {
- errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
- errorDesc = e.getMessage();
- handleIOException(e, "processing email");
- throw new ManifoldCFException(e.getMessage(), e);
+ } catch (ManifoldCFException e) {
+ if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
+ errorCode = null;
+ throw e;
+ } finally {
+ if (errorCode != null)
+ activities.recordActivity(new Long(startTime),EmailConfig.ACTIVITY_FETCH,
+ fileLengthLong,documentIdentifier,errorCode,errorDesc,null);
}
- } catch (ManifoldCFException e) {
- if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
- errorCode = null;
- throw e;
- } finally {
- if (errorCode != null)
- activities.recordActivity(new Long(startTime),EmailConfig.ACTIVITY_FETCH,
- fileLengthLong,documentIdentifier,errorCode,errorDesc,null);
+
}
}
}
@@ -805,10 +952,19 @@ public class EmailConnector extends org.
private static void fillInURLConfigurationMap(Map<String, Object> paramMap, IPasswordMapperActivity mapper, ConfigParams parameters) {
String urlTemplate = parameters.getParameter(EmailConfig.URL_PARAM);
- if (urlTemplate == null)
+ if (urlTemplate == null) {
urlTemplate = "http://sampleserver/$(FOLDERNAME)?id=$(MESSAGEID)";
+ }
paramMap.put("URL", urlTemplate);
+
+ String attachmentUrlTemplate = parameters.getParameter(EmailConfig.ATTACHMENT_URL_PARAM);
+
+ if (attachmentUrlTemplate == null) {
+ attachmentUrlTemplate = "http://sampleserver/$(FOLDERNAME)?id=$(MESSAGEID)&attach=$(ATTACHMENTNUMBER)";
+ }
+
+ paramMap.put("ATTACHMENTURL", attachmentUrlTemplate);
}
/**
@@ -833,6 +989,10 @@ public class EmailConnector extends org.
if (urlTemplate != null)
parameters.setParameter(EmailConfig.URL_PARAM, urlTemplate);
+ String attachmentUrlTemplate = variableContext.getParameter("attachmenturl");
+ if (attachmentUrlTemplate != null)
+ parameters.setParameter(EmailConfig.ATTACHMENT_URL_PARAM, attachmentUrlTemplate);
+
String userName = variableContext.getParameter("username");
if (userName != null)
parameters.setParameter(EmailConfig.USERNAME_PARAM, userName);
@@ -1210,6 +1370,20 @@ public class EmailConnector extends org.
return substitute(urlTemplate, subsMap);
}
+ /** Create a document's URI given a template, a folder name, a message ID, and an attachment number */
+ protected static String makeDocumentURI(String urlTemplate, String folderName, String id, int attachmentNumber)
+ {
+ // First, URL encode folder name and id
+ String encodedFolderName = URLEncoder.encode(folderName);
+ String encodedId = URLEncoder.encode(id);
+ // The template is already URL encoded, except for the substitution points
+ Map<String,String> subsMap = new HashMap<String,String>();
+ subsMap.put("FOLDERNAME", encodedFolderName);
+ subsMap.put("MESSAGEID", encodedId);
+ subsMap.put("ATTACHMENTNUMBER", Integer.toString(attachmentNumber));
+ return substitute(urlTemplate, subsMap);
+ }
+
protected static String substitute(String template, Map<String,String> map)
{
StringBuilder sb = new StringBuilder();
Modified: manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_en_US.properties
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_en_US.properties?rev=1782061&r1=1782060&r2=1782061&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_en_US.properties (original)
+++ manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_en_US.properties Tue Feb 7 19:37:18 2017
@@ -47,8 +47,7 @@ EmailConnector.MetadataName=Metadata nam
EmailConnector.NoMetadataSpecified=No metadata specified
EmailConnector.SelectMetadataName=--Select metadata name --
EmailConnector.IncludedMetadataColon=Included metadata:
-
-
+EmailConnector.AttachmentURLTemplateColon=Attachment URL template (blank if no attachments desired):
Modified: manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_es_ES.properties
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_es_ES.properties?rev=1782061&r1=1782060&r2=1782061&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_es_ES.properties (original)
+++ manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_es_ES.properties Tue Feb 7 19:37:18 2017
@@ -19,27 +19,27 @@ EmailConnector.Metadata=metadatos
EmailConnector.Filter=Filtro
EmailConnector.EnterAMailServerHostName=Introduzca un nombre de host del servidor de correo
-EmailConnector.PleaseSelectAConfigurationParameterName=Por favor seleccione un nombre de par�metro de configuraci�n
+EmailConnector.PleaseSelectAConfigurationParameterName=Por favor seleccione un nombre de parámetro de configuración
EmailConnector.PleaseSelectAMetadataName=Por favor seleccione un nombre de metadatos
EmailConnector.ValueCannotBeBlank=El valor no puede estar en blanco
EmailConnector.URLTemplateCannotBeBlank=URL plantilla no puede estar en blanco
EmailConnector.URLTemplateColon=URL plantilla:
-EmailConnector.ConfigurationPropertiesColon=propiedades de configuraci�n:
+EmailConnector.ConfigurationPropertiesColon=propiedades de configuración:
EmailConnector.ProtocolColon=Protocolo:
EmailConnector.HostNameColon=nombre de host:
EmailConnector.PortColon=Puerto:
EmailConnector.UserNameColon=Nombre de usuario:
-EmailConnector.PasswordColon=Contrase�a:
+EmailConnector.PasswordColon=Contraseña:
EmailConnector.MatchesColon=Correspondecia:
EmailConnector.FoldersColon=Carpetas:
EmailConnector.RecordFilterColon=filtro de registros:
EmailConnector.ServerProperty=propiedad del servidor
EmailConnector.Value=Valor
EmailConnector.NoServerPropertiesSpecified=No hay propiedades de servidor especificados
-EmailConnector.AddNewMatch=A�adir una nueva correspondecia
-EmailConnector.AddNewProperty=A�adir nueva propiedad
-EmailConnector.Add=a�adir
+EmailConnector.AddNewMatch=Añadir una nueva correspondecia
+EmailConnector.AddNewProperty=Añadir nueva propiedad
+EmailConnector.Add=añadir
EmailConnector.DeleteMatchNumber=eliminar correspondencia #
EmailConnector.DeletePropertyNumber=eliminar propiedad #
EmailConnector.Delete=borrar
@@ -47,6 +47,7 @@ EmailConnector.MetadataName=nombre de me
EmailConnector.NoMetadataSpecified=Sin metadatos especificada
EmailConnector.SelectMetadataName=--Seleccione el nombre de metadatos --
EmailConnector.IncludedMetadataColon=metadatos Incluido:
+EmailConnector.AttachmentURLTemplateColon=Attachment URL template (blank if no attachments desired):
Modified: manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_ja_JP.properties
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_ja_JP.properties?rev=1782061&r1=1782060&r2=1782061&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_ja_JP.properties (original)
+++ manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_ja_JP.properties Tue Feb 7 19:37:18 2017
@@ -47,6 +47,7 @@ EmailConnector.MetadataName=Metadata nam
EmailConnector.NoMetadataSpecified=No metadata specified
EmailConnector.SelectMetadataName=--Select metadata name --
EmailConnector.IncludedMetadataColon=Included metadata:
+EmailConnector.AttachmentURLTemplateColon=Attachment URL template (blank if no attachments desired):
Modified: manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_zh_CN.properties
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_zh_CN.properties?rev=1782061&r1=1782060&r2=1782061&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_zh_CN.properties (original)
+++ manifoldcf/trunk/connectors/email/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/email/common_zh_CN.properties Tue Feb 7 19:37:18 2017
@@ -47,6 +47,7 @@ EmailConnector.MetadataName=元数
EmailConnector.NoMetadataSpecified=元数据未指定
EmailConnector.SelectMetadataName=-- 选择元数据名 --
EmailConnector.IncludedMetadataColon=被包含的元数据:
+EmailConnector.AttachmentURLTemplateColon=Attachment URL template (blank if no attachments desired):
Modified: manifoldcf/trunk/connectors/email/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/email/Configuration_URL.html
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/email/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/email/Configuration_URL.html?rev=1782061&r1=1782060&r2=1782061&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/email/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/email/Configuration_URL.html (original)
+++ manifoldcf/trunk/connectors/email/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/email/Configuration_URL.html Tue Feb 7 19:37:18 2017
@@ -27,6 +27,14 @@ limitations under the License.
<nobr><input type="text" name="url" size="60" value="$Encoder.attributeEscape($URL)"/></nobr>
</td>
</tr>
+ <tr>
+ <td class="description">
+ <nobr>$Encoder.bodyEscape($ResourceBundle.getString('EmailConnector.AttachmentURLTemplateColon'))</nobr>
+ </td>
+ <td class="value">
+ <nobr><input type="text" name="attachmenturl" size="60" value="$Encoder.attributeEscape($ATTACHMENTURL)"/></nobr>
+ </td>
+ </tr>
</table>
#else
Modified: manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java?rev=1782061&r1=1782060&r2=1782061&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java (original)
+++ manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java Tue Feb 7 19:37:18 2017
@@ -1100,7 +1100,8 @@ public class HttpPoster
throws IOException
{
ModifiableSolrParams out = new ModifiableSolrParams();
-
+ Logging.ingest.debug("Solr: Writing document '"+documentURI);
+
// Write the id field
writeField(out,LITERAL+idAttributeName,documentURI);
// Write the rest of the attributes
@@ -1179,6 +1180,8 @@ public class HttpPoster
contentStreamUpdateRequest.setParams(out);
contentStreamUpdateRequest.addContentStream(new RepositoryDocumentStream(is,length,contentType,contentName));
+
+ Logging.ingest.debug("Solr: Done writing '"+documentURI+"'");
}
/**
@@ -1196,6 +1199,7 @@ public class HttpPoster
{
String originalFieldName = iter.next();
String fieldName = makeSafeLuceneField(originalFieldName);
+ Logging.ingest.debug("Solr: Saw field '"+originalFieldName+"'; converted to '"+fieldName+"'");
applySingleMapping(originalFieldName, out, fieldName);
}
}