You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@james.apache.org by bt...@apache.org on 2022/05/13 04:43:19 UTC
[james-project] 03/03: JAMES-3719 Reading inputstream might be blocking
This is an automated email from the ASF dual-hosted git repository.
btellier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-project.git
commit 14fb2f6d7c7df15459a87032df5c3a38adbbdb42
Author: Benoit Tellier <bt...@linagora.com>
AuthorDate: Wed May 11 15:15:58 2022 +0700
JAMES-3719 Reading inputstream might be blocking
Subscribes on an elastic scheduler when blocking reads
might be performed.
---
.../james/mailbox/extractor/TextExtractor.java | 4 +++-
.../store/extractor/DefaultTextExtractor.java | 14 ++++++++++++++
.../mailbox/store/extractor/JsoupTextExtractor.java | 20 ++++++++++++++++++++
3 files changed, 37 insertions(+), 1 deletion(-)
diff --git a/mailbox/api/src/main/java/org/apache/james/mailbox/extractor/TextExtractor.java b/mailbox/api/src/main/java/org/apache/james/mailbox/extractor/TextExtractor.java
index 2822ee02e8..7891557039 100644
--- a/mailbox/api/src/main/java/org/apache/james/mailbox/extractor/TextExtractor.java
+++ b/mailbox/api/src/main/java/org/apache/james/mailbox/extractor/TextExtractor.java
@@ -24,6 +24,7 @@ import java.io.InputStream;
import org.apache.james.mailbox.model.ContentType;
import reactor.core.publisher.Mono;
+import reactor.core.scheduler.Schedulers;
public interface TextExtractor {
default boolean applicable(ContentType contentType) {
@@ -33,7 +34,8 @@ public interface TextExtractor {
ParsedContent extractContent(InputStream inputStream, ContentType contentType) throws Exception;
default Mono<ParsedContent> extractContentReactive(InputStream inputStream, ContentType contentType) {
- return Mono.fromCallable(() -> extractContent(inputStream, contentType));
+ return Mono.fromCallable(() -> extractContent(inputStream, contentType))
+ .subscribeOn(Schedulers.elastic());
}
}
diff --git a/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/DefaultTextExtractor.java b/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/DefaultTextExtractor.java
index 50cc8b68e3..2605f850e6 100644
--- a/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/DefaultTextExtractor.java
+++ b/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/DefaultTextExtractor.java
@@ -30,6 +30,9 @@ import org.apache.james.mailbox.extractor.ParsedContent;
import org.apache.james.mailbox.extractor.TextExtractor;
import org.apache.james.mailbox.model.ContentType;
+import reactor.core.publisher.Mono;
+import reactor.core.scheduler.Schedulers;
+
/**
* A default text extractor that is directly based on the input file provided.
*
@@ -50,4 +53,15 @@ public class DefaultTextExtractor implements TextExtractor {
return new ParsedContent(Optional.empty(), new HashMap<>());
}
}
+
+ @Override
+ public Mono<ParsedContent> extractContentReactive(InputStream inputStream, ContentType contentType) {
+ if (applicable(contentType)) {
+ Charset charset = contentType.charset().orElse(StandardCharsets.UTF_8);
+ return Mono.fromCallable(() -> new ParsedContent(Optional.ofNullable(IOUtils.toString(inputStream, charset)), new HashMap<>()))
+ .subscribeOn(Schedulers.elastic());
+ } else {
+ return Mono.just(new ParsedContent(Optional.empty(), new HashMap<>()));
+ }
+ }
}
diff --git a/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/JsoupTextExtractor.java b/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/JsoupTextExtractor.java
index b06f55ffc0..48988c1dd0 100644
--- a/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/JsoupTextExtractor.java
+++ b/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/JsoupTextExtractor.java
@@ -37,6 +37,9 @@ import org.jsoup.nodes.Document;
import com.google.common.collect.ImmutableMap;
+import reactor.core.publisher.Mono;
+import reactor.core.scheduler.Schedulers;
+
public class JsoupTextExtractor implements TextExtractor {
private static final String TITLE_HTML_TAG = "title";
private static final String NO_BASE_URI = "";
@@ -67,6 +70,23 @@ public class JsoupTextExtractor implements TextExtractor {
return ParsedContent.empty();
}
+ @Override
+ public Mono<ParsedContent> extractContentReactive(InputStream inputStream, ContentType contentType) {
+ if (inputStream == null || contentType == null) {
+ return Mono.just(ParsedContent.empty());
+ }
+ Charset charset = contentType.charset().orElse(StandardCharsets.UTF_8);
+ if (contentType.mimeType().equals(TEXT_HTML)) {
+ return Mono.fromCallable(() -> parseHtmlContent(inputStream, charset))
+ .subscribeOn(Schedulers.elastic());
+ }
+ if (contentType.mimeType().equals(TEXT_PLAIN)) {
+ return Mono.fromCallable(() -> parsePlainTextContent(inputStream, charset))
+ .subscribeOn(Schedulers.elastic());
+ }
+ return Mono.just(ParsedContent.empty());
+ }
+
private ParsedContent parsePlainTextContent(InputStream inputStream, Charset charset) throws IOException {
return new ParsedContent(Optional.ofNullable(IOUtils.toString(inputStream, charset)), EMPTY_METADATA);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: notifications-unsubscribe@james.apache.org
For additional commands, e-mail: notifications-help@james.apache.org