You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@james.apache.org by bt...@apache.org on 2022/05/13 04:43:19 UTC

[james-project] 03/03: JAMES-3719 Reading inputstream might be blocking

This is an automated email from the ASF dual-hosted git repository.

btellier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-project.git

commit 14fb2f6d7c7df15459a87032df5c3a38adbbdb42
Author: Benoit Tellier <bt...@linagora.com>
AuthorDate: Wed May 11 15:15:58 2022 +0700

    JAMES-3719 Reading inputstream might be blocking
    
    Subscribes on an elastic scheduler when blocking reads
    might be performed.
---
 .../james/mailbox/extractor/TextExtractor.java       |  4 +++-
 .../store/extractor/DefaultTextExtractor.java        | 14 ++++++++++++++
 .../mailbox/store/extractor/JsoupTextExtractor.java  | 20 ++++++++++++++++++++
 3 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/mailbox/api/src/main/java/org/apache/james/mailbox/extractor/TextExtractor.java b/mailbox/api/src/main/java/org/apache/james/mailbox/extractor/TextExtractor.java
index 2822ee02e8..7891557039 100644
--- a/mailbox/api/src/main/java/org/apache/james/mailbox/extractor/TextExtractor.java
+++ b/mailbox/api/src/main/java/org/apache/james/mailbox/extractor/TextExtractor.java
@@ -24,6 +24,7 @@ import java.io.InputStream;
 import org.apache.james.mailbox.model.ContentType;
 
 import reactor.core.publisher.Mono;
+import reactor.core.scheduler.Schedulers;
 
 public interface TextExtractor {
     default boolean applicable(ContentType contentType) {
@@ -33,7 +34,8 @@ public interface TextExtractor {
     ParsedContent extractContent(InputStream inputStream, ContentType contentType) throws Exception;
 
     default Mono<ParsedContent> extractContentReactive(InputStream inputStream, ContentType contentType) {
-        return Mono.fromCallable(() -> extractContent(inputStream, contentType));
+        return Mono.fromCallable(() -> extractContent(inputStream, contentType))
+            .subscribeOn(Schedulers.elastic());
     }
 
 }
diff --git a/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/DefaultTextExtractor.java b/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/DefaultTextExtractor.java
index 50cc8b68e3..2605f850e6 100644
--- a/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/DefaultTextExtractor.java
+++ b/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/DefaultTextExtractor.java
@@ -30,6 +30,9 @@ import org.apache.james.mailbox.extractor.ParsedContent;
 import org.apache.james.mailbox.extractor.TextExtractor;
 import org.apache.james.mailbox.model.ContentType;
 
+import reactor.core.publisher.Mono;
+import reactor.core.scheduler.Schedulers;
+
 /**
  * A default text extractor that is directly based on the input file provided.
  * 
@@ -50,4 +53,15 @@ public class DefaultTextExtractor implements TextExtractor {
             return new ParsedContent(Optional.empty(), new HashMap<>());
         }
     }
+
+    @Override
+    public Mono<ParsedContent> extractContentReactive(InputStream inputStream, ContentType contentType) {
+        if (applicable(contentType)) {
+            Charset charset = contentType.charset().orElse(StandardCharsets.UTF_8);
+            return Mono.fromCallable(() -> new ParsedContent(Optional.ofNullable(IOUtils.toString(inputStream, charset)), new HashMap<>()))
+                .subscribeOn(Schedulers.elastic());
+        } else {
+            return Mono.just(new ParsedContent(Optional.empty(), new HashMap<>()));
+        }
+    }
 }
diff --git a/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/JsoupTextExtractor.java b/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/JsoupTextExtractor.java
index b06f55ffc0..48988c1dd0 100644
--- a/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/JsoupTextExtractor.java
+++ b/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/JsoupTextExtractor.java
@@ -37,6 +37,9 @@ import org.jsoup.nodes.Document;
 
 import com.google.common.collect.ImmutableMap;
 
+import reactor.core.publisher.Mono;
+import reactor.core.scheduler.Schedulers;
+
 public class JsoupTextExtractor implements TextExtractor {
     private static final String TITLE_HTML_TAG = "title";
     private static final String NO_BASE_URI = "";
@@ -67,6 +70,23 @@ public class JsoupTextExtractor implements TextExtractor {
         return ParsedContent.empty();
     }
 
+    @Override
+    public Mono<ParsedContent> extractContentReactive(InputStream inputStream, ContentType contentType) {
+        if (inputStream == null || contentType == null) {
+            return Mono.just(ParsedContent.empty());
+        }
+        Charset charset = contentType.charset().orElse(StandardCharsets.UTF_8);
+        if (contentType.mimeType().equals(TEXT_HTML)) {
+            return Mono.fromCallable(() -> parseHtmlContent(inputStream, charset))
+                .subscribeOn(Schedulers.elastic());
+        }
+        if (contentType.mimeType().equals(TEXT_PLAIN)) {
+            return Mono.fromCallable(() -> parsePlainTextContent(inputStream, charset))
+                .subscribeOn(Schedulers.elastic());
+        }
+        return Mono.just(ParsedContent.empty());
+    }
+
     private ParsedContent parsePlainTextContent(InputStream inputStream, Charset charset) throws IOException {
         return new ParsedContent(Optional.ofNullable(IOUtils.toString(inputStream, charset)), EMPTY_METADATA);
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: notifications-unsubscribe@james.apache.org
For additional commands, e-mail: notifications-help@james.apache.org