You are viewing a plain text version of this content. The canonical link for it is here.
Posted to server-dev@james.apache.org by bt...@apache.org on 2018/05/31 01:53:50 UTC

[11/14] james-project git commit: MAILBOX-338 Implement a cache for TikaTextExtractor

MAILBOX-338 Implement a cache for TikaTextExtractor


Project: http://git-wip-us.apache.org/repos/asf/james-project/repo
Commit: http://git-wip-us.apache.org/repos/asf/james-project/commit/6d3396ef
Tree: http://git-wip-us.apache.org/repos/asf/james-project/tree/6d3396ef
Diff: http://git-wip-us.apache.org/repos/asf/james-project/diff/6d3396ef

Branch: refs/heads/master
Commit: 6d3396efb096665c1fae01b13b292d49b3840888
Parents: 918c86f
Author: benwa <bt...@linagora.com>
Authored: Sun May 27 13:26:43 2018 +0700
Committer: benwa <bt...@linagora.com>
Committed: Thu May 31 08:53:16 2018 +0700

----------------------------------------------------------------------
 .../mailbox/tika/CachingTextExtractor.java      |  84 +++++++++
 .../james/mailbox/tika/TikaConfiguration.java   |  66 ++++++-
 .../mailbox/tika/CachingTextExtractorTest.java  |  98 ++++++++++
 .../mailbox/TikaConfigurationReader.java        |  64 +++++++
 .../modules/mailbox/TikaMailboxModule.java      |  30 ++--
 .../mailbox/TikaConfigurationReaderTest.java    | 178 +++++++++++++++++++
 6 files changed, 504 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/james-project/blob/6d3396ef/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/CachingTextExtractor.java
----------------------------------------------------------------------
diff --git a/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/CachingTextExtractor.java b/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/CachingTextExtractor.java
new file mode 100644
index 0000000..8bfed1b
--- /dev/null
+++ b/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/CachingTextExtractor.java
@@ -0,0 +1,84 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+
+package org.apache.james.mailbox.tika;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.time.Duration;
+import java.util.Optional;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.io.IOUtils;
+import org.apache.james.mailbox.extractor.ParsedContent;
+import org.apache.james.mailbox.extractor.TextExtractor;
+
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.Weigher;
+import com.google.common.util.concurrent.UncheckedExecutionException;
+
+
+public class CachingTextExtractor implements TextExtractor {
+    private final TextExtractor underlying;
+    private final Cache<String, ParsedContent> cache;
+
+    public CachingTextExtractor(TextExtractor underlying, Duration cacheEvictionPeriod, Long cacheWeightInBytes) {
+        this.underlying = underlying;
+
+        Weigher<String, ParsedContent> weigher = (key, parsedContent) -> getSize(parsedContent);
+        this.cache = CacheBuilder.<String, String>newBuilder()
+            .expireAfterAccess(cacheEvictionPeriod.toMillis(), TimeUnit.MILLISECONDS)
+            .maximumWeight(cacheWeightInBytes)
+            .weigher(weigher)
+            .build();
+    }
+
+    private int getSize(ParsedContent parsedContent) {
+        return parsedContent.getTextualContent()
+            .map(String::length)
+            .map(this::utf16LengthToBytesCount)
+            .orElse(0);
+    }
+
+    private int utf16LengthToBytesCount(Integer value) {
+        return value * 2;
+    }
+
+    @Override
+    public ParsedContent extractContent(InputStream inputStream, String contentType) throws Exception {
+        byte[] bytes = IOUtils.toByteArray(inputStream);
+        String key = DigestUtils.sha256Hex(bytes);
+        try {
+            return cache.get(key,
+                () -> underlying.extractContent(new ByteArrayInputStream(bytes), contentType));
+        } catch (UncheckedExecutionException | ExecutionException e) {
+            throw unwrap(e);
+        }
+    }
+
+    private Exception unwrap(Exception e) {
+        return Optional.ofNullable(e.getCause())
+            .filter(throwable -> throwable instanceof Exception)
+            .map(throwable -> (Exception) throwable)
+            .orElse(e);
+    }
+}

http://git-wip-us.apache.org/repos/asf/james-project/blob/6d3396ef/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaConfiguration.java
----------------------------------------------------------------------
diff --git a/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaConfiguration.java b/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaConfiguration.java
index 633afc3..e7994b7 100644
--- a/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaConfiguration.java
+++ b/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaConfiguration.java
@@ -19,6 +19,8 @@
 
 package org.apache.james.mailbox.tika;
 
+import java.time.Duration;
+import java.util.Objects;
 import java.util.Optional;
 
 import org.apache.james.util.Port;
@@ -36,11 +38,15 @@ public class TikaConfiguration {
         private Optional<String> host;
         private Optional<Integer> port;
         private Optional<Integer> timeoutInMillis;
+        private Optional<Duration> cacheEvictionPeriod;
+        private Optional<Long> cacheWeightInBytes;
 
         private Builder() {
             host = Optional.empty();
             port = Optional.empty();
             timeoutInMillis = Optional.empty();
+            cacheEvictionPeriod = Optional.empty();
+            cacheWeightInBytes = Optional.empty();
         }
 
         public Builder host(String host) {
@@ -58,24 +64,52 @@ public class TikaConfiguration {
             return this;
         }
 
+        public Builder cacheEvictionPeriod(Duration duration) {
+            this.cacheEvictionPeriod = Optional.of(duration);
+            return this;
+        }
+
+        public Builder cacheEvictionPeriod(Optional<Duration> duration) {
+            this.cacheEvictionPeriod = duration;
+            return this;
+        }
+
+        public Builder cacheWeightInBytes(long weight) {
+            this.cacheWeightInBytes = Optional.of(weight);
+            return this;
+        }
+
+        public Builder cacheWeightInBytes(Optional<Long> weight) {
+            this.cacheWeightInBytes = weight;
+            return this;
+        }
+
         public TikaConfiguration build() {
             Preconditions.checkState(host.isPresent(), "'host' is mandatory");
             Preconditions.checkState(port.isPresent(), "'port' is mandatory");
             Preconditions.checkState(timeoutInMillis.isPresent(), "'timeoutInMillis' is mandatory");
             Port.assertValid(port.get());
 
-            return new TikaConfiguration(host.get(), port.get(), timeoutInMillis.get());
+            return new TikaConfiguration(host.get(), port.get(), timeoutInMillis.get(),
+                cacheEvictionPeriod.orElse(DEFAULT_CACHE_EVICTION_PERIOD),
+                cacheWeightInBytes.orElse(DEFAULT_CACHE_LIMIT_100_MB));
         }
     }
+    public static final long DEFAULT_CACHE_LIMIT_100_MB = 1024L * 1024L * 100L;
+    public static final Duration DEFAULT_CACHE_EVICTION_PERIOD = Duration.ofDays(1);
 
     private final String host;
     private final int port;
     private final int timeoutInMillis;
+    private final Duration cacheEvictionPeriod;
+    private final long cacheWeightInBytes;
 
-    private TikaConfiguration(String host, int port, int timeoutInMillis) {
+    private TikaConfiguration(String host, int port, int timeoutInMillis, Duration cacheEvictionPeriod, long cacheWeightInBytes) {
         this.host = host;
         this.port = port;
         this.timeoutInMillis = timeoutInMillis;
+        this.cacheEvictionPeriod = cacheEvictionPeriod;
+        this.cacheWeightInBytes = cacheWeightInBytes;
     }
 
     public String getHost() {
@@ -89,4 +123,32 @@ public class TikaConfiguration {
     public int getTimeoutInMillis() {
         return timeoutInMillis;
     }
+
+    public Duration getCacheEvictionPeriod() {
+        return cacheEvictionPeriod;
+    }
+
+    public long getCacheWeightInBytes() {
+        return cacheWeightInBytes;
+    }
+
+    @Override
+    public final boolean equals(Object o) {
+        if (o instanceof TikaConfiguration) {
+            TikaConfiguration that = (TikaConfiguration) o;
+
+            return Objects.equals(this.port, that.port)
+                && Objects.equals(this.timeoutInMillis, that.timeoutInMillis)
+                && Objects.equals(this.cacheWeightInBytes, that.cacheWeightInBytes)
+                && Objects.equals(this.host, that.host)
+                && Objects.equals(this.cacheEvictionPeriod, that.cacheEvictionPeriod);
+        }
+        return false;
+    }
+
+    @Override
+    public final int hashCode() {
+        return Objects.hash(host, port, timeoutInMillis, cacheEvictionPeriod, cacheWeightInBytes);
+    }
+
 }

http://git-wip-us.apache.org/repos/asf/james-project/blob/6d3396ef/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/CachingTextExtractorTest.java
----------------------------------------------------------------------
diff --git a/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/CachingTextExtractorTest.java b/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/CachingTextExtractorTest.java
new file mode 100644
index 0000000..154a5ff
--- /dev/null
+++ b/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/CachingTextExtractorTest.java
@@ -0,0 +1,98 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+
+package org.apache.james.mailbox.tika;
+
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.when;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.function.Supplier;
+
+import org.apache.james.mailbox.extractor.ParsedContent;
+import org.apache.james.mailbox.extractor.TextExtractor;
+import org.junit.Before;
+import org.junit.Test;
+import org.testcontainers.shaded.com.google.common.collect.ImmutableMap;
+
+public class CachingTextExtractorTest {
+
+    public static final ParsedContent RESULT = new ParsedContent("content", ImmutableMap.of());
+    public static final Supplier<InputStream> INPUT_STREAM_1 = () -> new ByteArrayInputStream("content1".getBytes(StandardCharsets.UTF_8));
+
+    private TextExtractor textExtractor;
+    private TextExtractor wrappedTextExtractor;
+
+    @Before
+    public void setUp() throws Exception {
+        wrappedTextExtractor = mock(TextExtractor.class);
+        textExtractor = new CachingTextExtractor(wrappedTextExtractor,
+            TikaConfiguration.DEFAULT_CACHE_EVICTION_PERIOD,
+            TikaConfiguration.DEFAULT_CACHE_LIMIT_100_MB);
+
+        when(wrappedTextExtractor.extractContent(any(), any()))
+            .thenReturn(RESULT);
+    }
+
+    @Test
+    public void extractContentShouldCallUnderlyingTextExtractor() throws Exception {
+        textExtractor.extractContent(INPUT_STREAM_1.get(), "application/bytes");
+
+        verify(wrappedTextExtractor, times(1)).extractContent(any(), any());
+        verifyNoMoreInteractions(wrappedTextExtractor);
+    }
+
+    @Test
+    public void extractContentShouldAvoidCallingUnderlyingTextExtractorWhenPossible() throws Exception {
+        textExtractor.extractContent(INPUT_STREAM_1.get(), "application/bytes");
+        textExtractor.extractContent(INPUT_STREAM_1.get(), "application/bytes");
+
+        verify(wrappedTextExtractor, times(1)).extractContent(any(), any());
+        verifyNoMoreInteractions(wrappedTextExtractor);
+    }
+
+    @Test
+    public void extractContentShouldPropagateCheckedException() throws Exception {
+        IOException ioException = new IOException("Any");
+        when(wrappedTextExtractor.extractContent(any(), any()))
+            .thenThrow(ioException);
+
+        assertThatThrownBy(() -> textExtractor.extractContent(INPUT_STREAM_1.get(), "application/bytes"))
+            .isEqualTo(ioException);
+    }
+
+    @Test
+    public void extractContentShouldPropagateRuntimeException() throws Exception {
+        RuntimeException runtimeException = new RuntimeException("Any");
+        when(wrappedTextExtractor.extractContent(any(), any()))
+            .thenThrow(runtimeException);
+
+        assertThatThrownBy(() -> textExtractor.extractContent(INPUT_STREAM_1.get(), "application/bytes"))
+            .isEqualTo(runtimeException);
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/james-project/blob/6d3396ef/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/TikaConfigurationReader.java
----------------------------------------------------------------------
diff --git a/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/TikaConfigurationReader.java b/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/TikaConfigurationReader.java
new file mode 100644
index 0000000..13ca0cd
--- /dev/null
+++ b/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/TikaConfigurationReader.java
@@ -0,0 +1,64 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+
+package org.apache.james.modules.mailbox;
+
+import java.time.Duration;
+import java.util.Optional;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.configuration.PropertiesConfiguration;
+import org.apache.james.mailbox.tika.TikaConfiguration;
+import org.apache.james.util.Size;
+import org.apache.james.util.TimeConverter;
+
+import com.github.fge.lambdas.Throwing;
+import com.google.common.primitives.Ints;
+
+public class TikaConfigurationReader {
+    public static final String TIKA_HOST = "tika.host";
+    public static final String TIKA_PORT = "tika.port";
+    public static final String TIKA_TIMEOUT_IN_MS = "tika.timeoutInMillis";
+    public static final String DEFAULT_HOST = "127.0.0.1";
+    public static final int DEFAULT_PORT = 9998;
+    public static final String TIKA_CACHE_EVICTION_PERIOD = "tika.cache.eviction.period";
+    public static final String TIKA_CACHE_WEIGHT_MAX = "tika.cache.weight.max";
+    public static final int DEFAULT_TIMEOUT_IN_MS = Ints.checkedCast(TimeUnit.SECONDS.toMillis(30));
+
+    public static TikaConfiguration readTikaConfiguration(PropertiesConfiguration configuration) {
+        Optional<Duration> cacheEvictionPeriod = Optional.ofNullable(
+            configuration.getString(TIKA_CACHE_EVICTION_PERIOD,
+                null))
+            .map(rawString -> TimeConverter.getMilliSeconds(rawString, TimeConverter.Unit.SECONDS))
+            .map(Duration::ofMillis);
+
+        Optional<Long> cacheWeight = Optional.ofNullable(
+            configuration.getString(TIKA_CACHE_WEIGHT_MAX, null))
+            .map(Throwing.function(Size::parse))
+            .map(Size::asBytes);
+
+        return TikaConfiguration.builder()
+            .host(configuration.getString(TIKA_HOST, DEFAULT_HOST))
+            .port(configuration.getInt(TIKA_PORT, DEFAULT_PORT))
+            .timeoutInMillis(configuration.getInt(TIKA_TIMEOUT_IN_MS, DEFAULT_TIMEOUT_IN_MS))
+            .cacheEvictionPeriod(cacheEvictionPeriod)
+            .cacheWeightInBytes(cacheWeight)
+            .build();
+    }
+}

http://git-wip-us.apache.org/repos/asf/james-project/blob/6d3396ef/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/TikaMailboxModule.java
----------------------------------------------------------------------
diff --git a/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/TikaMailboxModule.java b/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/TikaMailboxModule.java
index c9cc6b8..6fcd505 100644
--- a/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/TikaMailboxModule.java
+++ b/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/TikaMailboxModule.java
@@ -19,15 +19,19 @@
 
 package org.apache.james.modules.mailbox;
 
+import static org.apache.james.modules.mailbox.TikaConfigurationReader.DEFAULT_HOST;
+import static org.apache.james.modules.mailbox.TikaConfigurationReader.DEFAULT_PORT;
+import static org.apache.james.modules.mailbox.TikaConfigurationReader.DEFAULT_TIMEOUT_IN_MS;
+
 import java.io.FileNotFoundException;
 import java.net.URISyntaxException;
-import java.util.concurrent.TimeUnit;
 
 import javax.inject.Singleton;
 
 import org.apache.commons.configuration.ConfigurationException;
 import org.apache.commons.configuration.PropertiesConfiguration;
 import org.apache.james.mailbox.extractor.TextExtractor;
+import org.apache.james.mailbox.tika.CachingTextExtractor;
 import org.apache.james.mailbox.tika.TikaConfiguration;
 import org.apache.james.mailbox.tika.TikaHttpClient;
 import org.apache.james.mailbox.tika.TikaHttpClientImpl;
@@ -36,7 +40,6 @@ import org.apache.james.utils.PropertiesProvider;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import com.google.common.primitives.Ints;
 import com.google.inject.AbstractModule;
 import com.google.inject.Provides;
 import com.google.inject.Scopes;
@@ -46,18 +49,11 @@ public class TikaMailboxModule extends AbstractModule {
     private static final Logger LOGGER = LoggerFactory.getLogger(TikaMailboxModule.class);
 
     private static final String TIKA_CONFIGURATION_NAME = "tika";
-    private static final String TIKA_HOST = "tika.host";
-    private static final String TIKA_PORT = "tika.port";
-    private static final String TIKA_TIMEOUT_IN_MS = "tika.timeoutInMillis";
 
-    private static final String DEFAULT_HOST = "127.0.0.1";
-    private static final int DEFAULT_PORT = 9998;
-    private static final int DEFAULT_TIMEOUT_IN_MS = Ints.checkedCast(TimeUnit.SECONDS.toMillis(30));
 
     @Override
     protected void configure() {
         bind(TikaTextExtractor.class).in(Scopes.SINGLETON);
-        bind(TextExtractor.class).to(TikaTextExtractor.class);
     }
 
     @Provides
@@ -71,11 +67,8 @@ public class TikaMailboxModule extends AbstractModule {
     private TikaConfiguration getTikaConfiguration(PropertiesProvider propertiesProvider) throws ConfigurationException {
         try {
             PropertiesConfiguration configuration = propertiesProvider.getConfiguration(TIKA_CONFIGURATION_NAME);
-            return TikaConfiguration.builder()
-                    .host(configuration.getString(TIKA_HOST, DEFAULT_HOST))
-                    .port(configuration.getInt(TIKA_PORT, DEFAULT_PORT))
-                    .timeoutInMillis(configuration.getInt(TIKA_TIMEOUT_IN_MS, DEFAULT_TIMEOUT_IN_MS))
-                    .build();
+
+            return TikaConfigurationReader.readTikaConfiguration(configuration);
         } catch (FileNotFoundException e) {
             LOGGER.warn("Could not find {} configuration file. Using {}:{} as contact point", TIKA_CONFIGURATION_NAME, DEFAULT_HOST, DEFAULT_PORT);
             return TikaConfiguration.builder()
@@ -86,4 +79,13 @@ public class TikaMailboxModule extends AbstractModule {
         }
     }
 
+    @Provides
+    @Singleton
+    private TextExtractor provideTextExtractor(TikaTextExtractor textExtractor, TikaConfiguration configuration) {
+        return new CachingTextExtractor(
+            textExtractor,
+            configuration.getCacheEvictionPeriod(),
+            configuration.getCacheWeightInBytes());
+    }
+
 }

http://git-wip-us.apache.org/repos/asf/james-project/blob/6d3396ef/server/container/guice/cassandra-guice/src/test/java/org/apache/james/modules/mailbox/TikaConfigurationReaderTest.java
----------------------------------------------------------------------
diff --git a/server/container/guice/cassandra-guice/src/test/java/org/apache/james/modules/mailbox/TikaConfigurationReaderTest.java b/server/container/guice/cassandra-guice/src/test/java/org/apache/james/modules/mailbox/TikaConfigurationReaderTest.java
new file mode 100644
index 0000000..484c8f0
--- /dev/null
+++ b/server/container/guice/cassandra-guice/src/test/java/org/apache/james/modules/mailbox/TikaConfigurationReaderTest.java
@@ -0,0 +1,178 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+
+package org.apache.james.modules.mailbox;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+import java.io.StringReader;
+import java.time.Duration;
+
+import org.apache.commons.configuration.PropertiesConfiguration;
+import org.apache.james.mailbox.tika.TikaConfiguration;
+import org.junit.Test;
+
+public class TikaConfigurationReaderTest {
+
+    @Test
+    public void readTikaConfigurationShouldAcceptMandatoryValues() throws Exception {
+        PropertiesConfiguration configuration = new PropertiesConfiguration();
+        configuration.load(new StringReader(
+            "tika.host=172.0.0.5\n" +
+                "tika.port=889\n" +
+                "tika.timeoutInMillis=500\n"));
+
+        assertThat(TikaConfigurationReader.readTikaConfiguration(configuration))
+            .isEqualTo(
+                TikaConfiguration.builder()
+                    .host("172.0.0.5")
+                    .port(889)
+                    .timeoutInMillis(500)
+                    .cacheWeightInBytes(100L * 1024L *1024L)
+                    .cacheEvictionPeriod(Duration.ofDays(1))
+                    .build());
+    }
+
+    @Test
+    public void readTikaConfigurationShouldReturnDefaultOnMissingHost() throws Exception {
+        PropertiesConfiguration configuration = new PropertiesConfiguration();
+        configuration.load(new StringReader(
+                "tika.port=889\n" +
+                "tika.timeoutInMillis=500\n"));
+
+        assertThat(TikaConfigurationReader.readTikaConfiguration(configuration))
+            .isEqualTo(
+                TikaConfiguration.builder()
+                    .host("127.0.0.1")
+                    .port(889)
+                    .timeoutInMillis(500)
+                    .build());
+    }
+
+    @Test
+    public void readTikaConfigurationShouldReturnDefaultOnMissingPort() throws Exception {
+        PropertiesConfiguration configuration = new PropertiesConfiguration();
+        configuration.load(new StringReader(
+            "tika.host=172.0.0.5\n" +
+                "tika.timeoutInMillis=500\n"));
+
+        assertThat(TikaConfigurationReader.readTikaConfiguration(configuration))
+            .isEqualTo(
+                TikaConfiguration.builder()
+                    .host("172.0.0.5")
+                    .port(9998)
+                    .timeoutInMillis(500)
+                    .build());
+    }
+
+    @Test
+    public void readTikaConfigurationShouldReturnDefaultOnMissingTimeout() throws Exception {
+        PropertiesConfiguration configuration = new PropertiesConfiguration();
+        configuration.load(new StringReader(
+            "tika.host=172.0.0.5\n" +
+                "tika.port=889\n"));
+
+        assertThat(TikaConfigurationReader.readTikaConfiguration(configuration))
+            .isEqualTo(
+                TikaConfiguration.builder()
+                    .host("172.0.0.5")
+                    .port(889)
+                    .timeoutInMillis(30 * 1000)
+                    .build());
+    }
+
+
+    @Test
+    public void readTikaConfigurationShouldParseUnitForCacheEvictionPeriod() throws Exception {
+        PropertiesConfiguration configuration = new PropertiesConfiguration();
+        configuration.load(new StringReader(
+            "tika.host=172.0.0.5\n" +
+                "tika.port=889\n" +
+                "tika.timeoutInMillis=500\n" +
+                "tika.cache.eviction.period=2H"));
+
+        assertThat(TikaConfigurationReader.readTikaConfiguration(configuration))
+            .isEqualTo(
+                TikaConfiguration.builder()
+                    .host("172.0.0.5")
+                    .port(889)
+                    .timeoutInMillis(500)
+                    .cacheEvictionPeriod(Duration.ofHours(2))
+                    .build());
+    }
+
+    @Test
+    public void readTikaConfigurationShouldDefaultToSecondWhenMissingUnitForCacheEvitionPeriod() throws Exception {
+        PropertiesConfiguration configuration = new PropertiesConfiguration();
+        configuration.load(new StringReader(
+            "tika.host=172.0.0.5\n" +
+                "tika.port=889\n" +
+                "tika.timeoutInMillis=500\n" +
+                "tika.cache.eviction.period=3600"));
+
+        assertThat(TikaConfigurationReader.readTikaConfiguration(configuration))
+            .isEqualTo(
+                TikaConfiguration.builder()
+                    .host("172.0.0.5")
+                    .port(889)
+                    .timeoutInMillis(500)
+                    .cacheEvictionPeriod(Duration.ofHours(1))
+                    .build());
+    }
+
+    @Test
+    public void readTikaConfigurationShouldParseUnitForCacheWeightMax() throws Exception {
+        PropertiesConfiguration configuration = new PropertiesConfiguration();
+        configuration.load(new StringReader(
+            "tika.host=172.0.0.5\n" +
+                "tika.port=889\n" +
+                "tika.timeoutInMillis=500\n" +
+                "tika.cache.weight.max=200M"));
+
+        assertThat(TikaConfigurationReader.readTikaConfiguration(configuration))
+            .isEqualTo(
+                TikaConfiguration.builder()
+                    .host("172.0.0.5")
+                    .port(889)
+                    .timeoutInMillis(500)
+                    .cacheWeightInBytes(200L * 1024L * 1024L)
+                    .build());
+    }
+
+    @Test
+    public void readTikaConfigurationShouldDefaultToByteAsSizeUnit() throws Exception {
+        PropertiesConfiguration configuration = new PropertiesConfiguration();
+        configuration.load(new StringReader(
+            "tika.host=172.0.0.5\n" +
+                "tika.port=889\n" +
+                "tika.timeoutInMillis=500\n" +
+                "tika.cache.weight.max=1520000"));
+
+        assertThat(TikaConfigurationReader.readTikaConfiguration(configuration))
+            .isEqualTo(
+                TikaConfiguration.builder()
+                    .host("172.0.0.5")
+                    .port(889)
+                    .timeoutInMillis(500)
+                    .cacheWeightInBytes(1520000)
+                    .build());
+    }
+
+
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: server-dev-unsubscribe@james.apache.org
For additional commands, e-mail: server-dev-help@james.apache.org