You are viewing a plain text version of this content. The canonical link for it is here.
Posted to server-dev@james.apache.org by bt...@apache.org on 2018/05/31 01:53:50 UTC
[11/14] james-project git commit: MAILBOX-338 Implement a cache for
TikaTextExtractor
MAILBOX-338 Implement a cache for TikaTextExtractor
Project: http://git-wip-us.apache.org/repos/asf/james-project/repo
Commit: http://git-wip-us.apache.org/repos/asf/james-project/commit/6d3396ef
Tree: http://git-wip-us.apache.org/repos/asf/james-project/tree/6d3396ef
Diff: http://git-wip-us.apache.org/repos/asf/james-project/diff/6d3396ef
Branch: refs/heads/master
Commit: 6d3396efb096665c1fae01b13b292d49b3840888
Parents: 918c86f
Author: benwa <bt...@linagora.com>
Authored: Sun May 27 13:26:43 2018 +0700
Committer: benwa <bt...@linagora.com>
Committed: Thu May 31 08:53:16 2018 +0700
----------------------------------------------------------------------
.../mailbox/tika/CachingTextExtractor.java | 84 +++++++++
.../james/mailbox/tika/TikaConfiguration.java | 66 ++++++-
.../mailbox/tika/CachingTextExtractorTest.java | 98 ++++++++++
.../mailbox/TikaConfigurationReader.java | 64 +++++++
.../modules/mailbox/TikaMailboxModule.java | 30 ++--
.../mailbox/TikaConfigurationReaderTest.java | 178 +++++++++++++++++++
6 files changed, 504 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/james-project/blob/6d3396ef/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/CachingTextExtractor.java
----------------------------------------------------------------------
diff --git a/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/CachingTextExtractor.java b/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/CachingTextExtractor.java
new file mode 100644
index 0000000..8bfed1b
--- /dev/null
+++ b/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/CachingTextExtractor.java
@@ -0,0 +1,84 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ ****************************************************************/
+
+package org.apache.james.mailbox.tika;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.time.Duration;
+import java.util.Optional;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.io.IOUtils;
+import org.apache.james.mailbox.extractor.ParsedContent;
+import org.apache.james.mailbox.extractor.TextExtractor;
+
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.Weigher;
+import com.google.common.util.concurrent.UncheckedExecutionException;
+
+
+public class CachingTextExtractor implements TextExtractor {
+ private final TextExtractor underlying;
+ private final Cache<String, ParsedContent> cache;
+
+ public CachingTextExtractor(TextExtractor underlying, Duration cacheEvictionPeriod, Long cacheWeightInBytes) {
+ this.underlying = underlying;
+
+ Weigher<String, ParsedContent> weigher = (key, parsedContent) -> getSize(parsedContent);
+ this.cache = CacheBuilder.<String, String>newBuilder()
+ .expireAfterAccess(cacheEvictionPeriod.toMillis(), TimeUnit.MILLISECONDS)
+ .maximumWeight(cacheWeightInBytes)
+ .weigher(weigher)
+ .build();
+ }
+
+ private int getSize(ParsedContent parsedContent) {
+ return parsedContent.getTextualContent()
+ .map(String::length)
+ .map(this::utf16LengthToBytesCount)
+ .orElse(0);
+ }
+
+ private int utf16LengthToBytesCount(Integer value) {
+ return value * 2;
+ }
+
+ @Override
+ public ParsedContent extractContent(InputStream inputStream, String contentType) throws Exception {
+ byte[] bytes = IOUtils.toByteArray(inputStream);
+ String key = DigestUtils.sha256Hex(bytes);
+ try {
+ return cache.get(key,
+ () -> underlying.extractContent(new ByteArrayInputStream(bytes), contentType));
+ } catch (UncheckedExecutionException | ExecutionException e) {
+ throw unwrap(e);
+ }
+ }
+
+ private Exception unwrap(Exception e) {
+ return Optional.ofNullable(e.getCause())
+ .filter(throwable -> throwable instanceof Exception)
+ .map(throwable -> (Exception) throwable)
+ .orElse(e);
+ }
+}
http://git-wip-us.apache.org/repos/asf/james-project/blob/6d3396ef/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaConfiguration.java
----------------------------------------------------------------------
diff --git a/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaConfiguration.java b/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaConfiguration.java
index 633afc3..e7994b7 100644
--- a/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaConfiguration.java
+++ b/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaConfiguration.java
@@ -19,6 +19,8 @@
package org.apache.james.mailbox.tika;
+import java.time.Duration;
+import java.util.Objects;
import java.util.Optional;
import org.apache.james.util.Port;
@@ -36,11 +38,15 @@ public class TikaConfiguration {
private Optional<String> host;
private Optional<Integer> port;
private Optional<Integer> timeoutInMillis;
+ private Optional<Duration> cacheEvictionPeriod;
+ private Optional<Long> cacheWeightInBytes;
private Builder() {
host = Optional.empty();
port = Optional.empty();
timeoutInMillis = Optional.empty();
+ cacheEvictionPeriod = Optional.empty();
+ cacheWeightInBytes = Optional.empty();
}
public Builder host(String host) {
@@ -58,24 +64,52 @@ public class TikaConfiguration {
return this;
}
+ public Builder cacheEvictionPeriod(Duration duration) {
+ this.cacheEvictionPeriod = Optional.of(duration);
+ return this;
+ }
+
+ public Builder cacheEvictionPeriod(Optional<Duration> duration) {
+ this.cacheEvictionPeriod = duration;
+ return this;
+ }
+
+ public Builder cacheWeightInBytes(long weight) {
+ this.cacheWeightInBytes = Optional.of(weight);
+ return this;
+ }
+
+ public Builder cacheWeightInBytes(Optional<Long> weight) {
+ this.cacheWeightInBytes = weight;
+ return this;
+ }
+
public TikaConfiguration build() {
Preconditions.checkState(host.isPresent(), "'host' is mandatory");
Preconditions.checkState(port.isPresent(), "'port' is mandatory");
Preconditions.checkState(timeoutInMillis.isPresent(), "'timeoutInMillis' is mandatory");
Port.assertValid(port.get());
- return new TikaConfiguration(host.get(), port.get(), timeoutInMillis.get());
+ return new TikaConfiguration(host.get(), port.get(), timeoutInMillis.get(),
+ cacheEvictionPeriod.orElse(DEFAULT_CACHE_EVICTION_PERIOD),
+ cacheWeightInBytes.orElse(DEFAULT_CACHE_LIMIT_100_MB));
}
}
+ public static final long DEFAULT_CACHE_LIMIT_100_MB = 1024L * 1024L * 100L;
+ public static final Duration DEFAULT_CACHE_EVICTION_PERIOD = Duration.ofDays(1);
private final String host;
private final int port;
private final int timeoutInMillis;
+ private final Duration cacheEvictionPeriod;
+ private final long cacheWeightInBytes;
- private TikaConfiguration(String host, int port, int timeoutInMillis) {
+ private TikaConfiguration(String host, int port, int timeoutInMillis, Duration cacheEvictionPeriod, long cacheWeightInBytes) {
this.host = host;
this.port = port;
this.timeoutInMillis = timeoutInMillis;
+ this.cacheEvictionPeriod = cacheEvictionPeriod;
+ this.cacheWeightInBytes = cacheWeightInBytes;
}
public String getHost() {
@@ -89,4 +123,32 @@ public class TikaConfiguration {
public int getTimeoutInMillis() {
return timeoutInMillis;
}
+
+ public Duration getCacheEvictionPeriod() {
+ return cacheEvictionPeriod;
+ }
+
+ public long getCacheWeightInBytes() {
+ return cacheWeightInBytes;
+ }
+
+ @Override
+ public final boolean equals(Object o) {
+ if (o instanceof TikaConfiguration) {
+ TikaConfiguration that = (TikaConfiguration) o;
+
+ return Objects.equals(this.port, that.port)
+ && Objects.equals(this.timeoutInMillis, that.timeoutInMillis)
+ && Objects.equals(this.cacheWeightInBytes, that.cacheWeightInBytes)
+ && Objects.equals(this.host, that.host)
+ && Objects.equals(this.cacheEvictionPeriod, that.cacheEvictionPeriod);
+ }
+ return false;
+ }
+
+ @Override
+ public final int hashCode() {
+ return Objects.hash(host, port, timeoutInMillis, cacheEvictionPeriod, cacheWeightInBytes);
+ }
+
}
http://git-wip-us.apache.org/repos/asf/james-project/blob/6d3396ef/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/CachingTextExtractorTest.java
----------------------------------------------------------------------
diff --git a/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/CachingTextExtractorTest.java b/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/CachingTextExtractorTest.java
new file mode 100644
index 0000000..154a5ff
--- /dev/null
+++ b/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/CachingTextExtractorTest.java
@@ -0,0 +1,98 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ ****************************************************************/
+
+package org.apache.james.mailbox.tika;
+
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.when;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.function.Supplier;
+
+import org.apache.james.mailbox.extractor.ParsedContent;
+import org.apache.james.mailbox.extractor.TextExtractor;
+import org.junit.Before;
+import org.junit.Test;
+import org.testcontainers.shaded.com.google.common.collect.ImmutableMap;
+
+public class CachingTextExtractorTest {
+
+ public static final ParsedContent RESULT = new ParsedContent("content", ImmutableMap.of());
+ public static final Supplier<InputStream> INPUT_STREAM_1 = () -> new ByteArrayInputStream("content1".getBytes(StandardCharsets.UTF_8));
+
+ private TextExtractor textExtractor;
+ private TextExtractor wrappedTextExtractor;
+
+ @Before
+ public void setUp() throws Exception {
+ wrappedTextExtractor = mock(TextExtractor.class);
+ textExtractor = new CachingTextExtractor(wrappedTextExtractor,
+ TikaConfiguration.DEFAULT_CACHE_EVICTION_PERIOD,
+ TikaConfiguration.DEFAULT_CACHE_LIMIT_100_MB);
+
+ when(wrappedTextExtractor.extractContent(any(), any()))
+ .thenReturn(RESULT);
+ }
+
+ @Test
+ public void extractContentShouldCallUnderlyingTextExtractor() throws Exception {
+ textExtractor.extractContent(INPUT_STREAM_1.get(), "application/bytes");
+
+ verify(wrappedTextExtractor, times(1)).extractContent(any(), any());
+ verifyNoMoreInteractions(wrappedTextExtractor);
+ }
+
+ @Test
+ public void extractContentShouldAvoidCallingUnderlyingTextExtractorWhenPossible() throws Exception {
+ textExtractor.extractContent(INPUT_STREAM_1.get(), "application/bytes");
+ textExtractor.extractContent(INPUT_STREAM_1.get(), "application/bytes");
+
+ verify(wrappedTextExtractor, times(1)).extractContent(any(), any());
+ verifyNoMoreInteractions(wrappedTextExtractor);
+ }
+
+ @Test
+ public void extractContentShouldPropagateCheckedException() throws Exception {
+ IOException ioException = new IOException("Any");
+ when(wrappedTextExtractor.extractContent(any(), any()))
+ .thenThrow(ioException);
+
+ assertThatThrownBy(() -> textExtractor.extractContent(INPUT_STREAM_1.get(), "application/bytes"))
+ .isEqualTo(ioException);
+ }
+
+ @Test
+ public void extractContentShouldPropagateRuntimeException() throws Exception {
+ RuntimeException runtimeException = new RuntimeException("Any");
+ when(wrappedTextExtractor.extractContent(any(), any()))
+ .thenThrow(runtimeException);
+
+ assertThatThrownBy(() -> textExtractor.extractContent(INPUT_STREAM_1.get(), "application/bytes"))
+ .isEqualTo(runtimeException);
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/james-project/blob/6d3396ef/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/TikaConfigurationReader.java
----------------------------------------------------------------------
diff --git a/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/TikaConfigurationReader.java b/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/TikaConfigurationReader.java
new file mode 100644
index 0000000..13ca0cd
--- /dev/null
+++ b/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/TikaConfigurationReader.java
@@ -0,0 +1,64 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ ****************************************************************/
+
+package org.apache.james.modules.mailbox;
+
+import java.time.Duration;
+import java.util.Optional;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.configuration.PropertiesConfiguration;
+import org.apache.james.mailbox.tika.TikaConfiguration;
+import org.apache.james.util.Size;
+import org.apache.james.util.TimeConverter;
+
+import com.github.fge.lambdas.Throwing;
+import com.google.common.primitives.Ints;
+
+public class TikaConfigurationReader {
+ public static final String TIKA_HOST = "tika.host";
+ public static final String TIKA_PORT = "tika.port";
+ public static final String TIKA_TIMEOUT_IN_MS = "tika.timeoutInMillis";
+ public static final String DEFAULT_HOST = "127.0.0.1";
+ public static final int DEFAULT_PORT = 9998;
+ public static final String TIKA_CACHE_EVICTION_PERIOD = "tika.cache.eviction.period";
+ public static final String TIKA_CACHE_WEIGHT_MAX = "tika.cache.weight.max";
+ public static final int DEFAULT_TIMEOUT_IN_MS = Ints.checkedCast(TimeUnit.SECONDS.toMillis(30));
+
+ public static TikaConfiguration readTikaConfiguration(PropertiesConfiguration configuration) {
+ Optional<Duration> cacheEvictionPeriod = Optional.ofNullable(
+ configuration.getString(TIKA_CACHE_EVICTION_PERIOD,
+ null))
+ .map(rawString -> TimeConverter.getMilliSeconds(rawString, TimeConverter.Unit.SECONDS))
+ .map(Duration::ofMillis);
+
+ Optional<Long> cacheWeight = Optional.ofNullable(
+ configuration.getString(TIKA_CACHE_WEIGHT_MAX, null))
+ .map(Throwing.function(Size::parse))
+ .map(Size::asBytes);
+
+ return TikaConfiguration.builder()
+ .host(configuration.getString(TIKA_HOST, DEFAULT_HOST))
+ .port(configuration.getInt(TIKA_PORT, DEFAULT_PORT))
+ .timeoutInMillis(configuration.getInt(TIKA_TIMEOUT_IN_MS, DEFAULT_TIMEOUT_IN_MS))
+ .cacheEvictionPeriod(cacheEvictionPeriod)
+ .cacheWeightInBytes(cacheWeight)
+ .build();
+ }
+}
http://git-wip-us.apache.org/repos/asf/james-project/blob/6d3396ef/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/TikaMailboxModule.java
----------------------------------------------------------------------
diff --git a/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/TikaMailboxModule.java b/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/TikaMailboxModule.java
index c9cc6b8..6fcd505 100644
--- a/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/TikaMailboxModule.java
+++ b/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/TikaMailboxModule.java
@@ -19,15 +19,19 @@
package org.apache.james.modules.mailbox;
+import static org.apache.james.modules.mailbox.TikaConfigurationReader.DEFAULT_HOST;
+import static org.apache.james.modules.mailbox.TikaConfigurationReader.DEFAULT_PORT;
+import static org.apache.james.modules.mailbox.TikaConfigurationReader.DEFAULT_TIMEOUT_IN_MS;
+
import java.io.FileNotFoundException;
import java.net.URISyntaxException;
-import java.util.concurrent.TimeUnit;
import javax.inject.Singleton;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.configuration.PropertiesConfiguration;
import org.apache.james.mailbox.extractor.TextExtractor;
+import org.apache.james.mailbox.tika.CachingTextExtractor;
import org.apache.james.mailbox.tika.TikaConfiguration;
import org.apache.james.mailbox.tika.TikaHttpClient;
import org.apache.james.mailbox.tika.TikaHttpClientImpl;
@@ -36,7 +40,6 @@ import org.apache.james.utils.PropertiesProvider;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.primitives.Ints;
import com.google.inject.AbstractModule;
import com.google.inject.Provides;
import com.google.inject.Scopes;
@@ -46,18 +49,11 @@ public class TikaMailboxModule extends AbstractModule {
private static final Logger LOGGER = LoggerFactory.getLogger(TikaMailboxModule.class);
private static final String TIKA_CONFIGURATION_NAME = "tika";
- private static final String TIKA_HOST = "tika.host";
- private static final String TIKA_PORT = "tika.port";
- private static final String TIKA_TIMEOUT_IN_MS = "tika.timeoutInMillis";
- private static final String DEFAULT_HOST = "127.0.0.1";
- private static final int DEFAULT_PORT = 9998;
- private static final int DEFAULT_TIMEOUT_IN_MS = Ints.checkedCast(TimeUnit.SECONDS.toMillis(30));
@Override
protected void configure() {
bind(TikaTextExtractor.class).in(Scopes.SINGLETON);
- bind(TextExtractor.class).to(TikaTextExtractor.class);
}
@Provides
@@ -71,11 +67,8 @@ public class TikaMailboxModule extends AbstractModule {
private TikaConfiguration getTikaConfiguration(PropertiesProvider propertiesProvider) throws ConfigurationException {
try {
PropertiesConfiguration configuration = propertiesProvider.getConfiguration(TIKA_CONFIGURATION_NAME);
- return TikaConfiguration.builder()
- .host(configuration.getString(TIKA_HOST, DEFAULT_HOST))
- .port(configuration.getInt(TIKA_PORT, DEFAULT_PORT))
- .timeoutInMillis(configuration.getInt(TIKA_TIMEOUT_IN_MS, DEFAULT_TIMEOUT_IN_MS))
- .build();
+
+ return TikaConfigurationReader.readTikaConfiguration(configuration);
} catch (FileNotFoundException e) {
LOGGER.warn("Could not find {} configuration file. Using {}:{} as contact point", TIKA_CONFIGURATION_NAME, DEFAULT_HOST, DEFAULT_PORT);
return TikaConfiguration.builder()
@@ -86,4 +79,13 @@ public class TikaMailboxModule extends AbstractModule {
}
}
+ @Provides
+ @Singleton
+ private TextExtractor provideTextExtractor(TikaTextExtractor textExtractor, TikaConfiguration configuration) {
+ return new CachingTextExtractor(
+ textExtractor,
+ configuration.getCacheEvictionPeriod(),
+ configuration.getCacheWeightInBytes());
+ }
+
}
http://git-wip-us.apache.org/repos/asf/james-project/blob/6d3396ef/server/container/guice/cassandra-guice/src/test/java/org/apache/james/modules/mailbox/TikaConfigurationReaderTest.java
----------------------------------------------------------------------
diff --git a/server/container/guice/cassandra-guice/src/test/java/org/apache/james/modules/mailbox/TikaConfigurationReaderTest.java b/server/container/guice/cassandra-guice/src/test/java/org/apache/james/modules/mailbox/TikaConfigurationReaderTest.java
new file mode 100644
index 0000000..484c8f0
--- /dev/null
+++ b/server/container/guice/cassandra-guice/src/test/java/org/apache/james/modules/mailbox/TikaConfigurationReaderTest.java
@@ -0,0 +1,178 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ ****************************************************************/
+
+package org.apache.james.modules.mailbox;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+import java.io.StringReader;
+import java.time.Duration;
+
+import org.apache.commons.configuration.PropertiesConfiguration;
+import org.apache.james.mailbox.tika.TikaConfiguration;
+import org.junit.Test;
+
+public class TikaConfigurationReaderTest {
+
+ @Test
+ public void readTikaConfigurationShouldAcceptMandatoryValues() throws Exception {
+ PropertiesConfiguration configuration = new PropertiesConfiguration();
+ configuration.load(new StringReader(
+ "tika.host=172.0.0.5\n" +
+ "tika.port=889\n" +
+ "tika.timeoutInMillis=500\n"));
+
+ assertThat(TikaConfigurationReader.readTikaConfiguration(configuration))
+ .isEqualTo(
+ TikaConfiguration.builder()
+ .host("172.0.0.5")
+ .port(889)
+ .timeoutInMillis(500)
+ .cacheWeightInBytes(100L * 1024L *1024L)
+ .cacheEvictionPeriod(Duration.ofDays(1))
+ .build());
+ }
+
+ @Test
+ public void readTikaConfigurationShouldReturnDefaultOnMissingHost() throws Exception {
+ PropertiesConfiguration configuration = new PropertiesConfiguration();
+ configuration.load(new StringReader(
+ "tika.port=889\n" +
+ "tika.timeoutInMillis=500\n"));
+
+ assertThat(TikaConfigurationReader.readTikaConfiguration(configuration))
+ .isEqualTo(
+ TikaConfiguration.builder()
+ .host("127.0.0.1")
+ .port(889)
+ .timeoutInMillis(500)
+ .build());
+ }
+
+ @Test
+ public void readTikaConfigurationShouldReturnDefaultOnMissingPort() throws Exception {
+ PropertiesConfiguration configuration = new PropertiesConfiguration();
+ configuration.load(new StringReader(
+ "tika.host=172.0.0.5\n" +
+ "tika.timeoutInMillis=500\n"));
+
+ assertThat(TikaConfigurationReader.readTikaConfiguration(configuration))
+ .isEqualTo(
+ TikaConfiguration.builder()
+ .host("172.0.0.5")
+ .port(9998)
+ .timeoutInMillis(500)
+ .build());
+ }
+
+ @Test
+ public void readTikaConfigurationShouldReturnDefaultOnMissingTimeout() throws Exception {
+ PropertiesConfiguration configuration = new PropertiesConfiguration();
+ configuration.load(new StringReader(
+ "tika.host=172.0.0.5\n" +
+ "tika.port=889\n"));
+
+ assertThat(TikaConfigurationReader.readTikaConfiguration(configuration))
+ .isEqualTo(
+ TikaConfiguration.builder()
+ .host("172.0.0.5")
+ .port(889)
+ .timeoutInMillis(30 * 1000)
+ .build());
+ }
+
+
+ @Test
+ public void readTikaConfigurationShouldParseUnitForCacheEvictionPeriod() throws Exception {
+ PropertiesConfiguration configuration = new PropertiesConfiguration();
+ configuration.load(new StringReader(
+ "tika.host=172.0.0.5\n" +
+ "tika.port=889\n" +
+ "tika.timeoutInMillis=500\n" +
+ "tika.cache.eviction.period=2H"));
+
+ assertThat(TikaConfigurationReader.readTikaConfiguration(configuration))
+ .isEqualTo(
+ TikaConfiguration.builder()
+ .host("172.0.0.5")
+ .port(889)
+ .timeoutInMillis(500)
+ .cacheEvictionPeriod(Duration.ofHours(2))
+ .build());
+ }
+
+ @Test
+ public void readTikaConfigurationShouldDefaultToSecondWhenMissingUnitForCacheEvitionPeriod() throws Exception {
+ PropertiesConfiguration configuration = new PropertiesConfiguration();
+ configuration.load(new StringReader(
+ "tika.host=172.0.0.5\n" +
+ "tika.port=889\n" +
+ "tika.timeoutInMillis=500\n" +
+ "tika.cache.eviction.period=3600"));
+
+ assertThat(TikaConfigurationReader.readTikaConfiguration(configuration))
+ .isEqualTo(
+ TikaConfiguration.builder()
+ .host("172.0.0.5")
+ .port(889)
+ .timeoutInMillis(500)
+ .cacheEvictionPeriod(Duration.ofHours(1))
+ .build());
+ }
+
+ @Test
+ public void readTikaConfigurationShouldParseUnitForCacheWeightMax() throws Exception {
+ PropertiesConfiguration configuration = new PropertiesConfiguration();
+ configuration.load(new StringReader(
+ "tika.host=172.0.0.5\n" +
+ "tika.port=889\n" +
+ "tika.timeoutInMillis=500\n" +
+ "tika.cache.weight.max=200M"));
+
+ assertThat(TikaConfigurationReader.readTikaConfiguration(configuration))
+ .isEqualTo(
+ TikaConfiguration.builder()
+ .host("172.0.0.5")
+ .port(889)
+ .timeoutInMillis(500)
+ .cacheWeightInBytes(200L * 1024L * 1024L)
+ .build());
+ }
+
+ @Test
+ public void readTikaConfigurationShouldDefaultToByteAsSizeUnit() throws Exception {
+ PropertiesConfiguration configuration = new PropertiesConfiguration();
+ configuration.load(new StringReader(
+ "tika.host=172.0.0.5\n" +
+ "tika.port=889\n" +
+ "tika.timeoutInMillis=500\n" +
+ "tika.cache.weight.max=1520000"));
+
+ assertThat(TikaConfigurationReader.readTikaConfiguration(configuration))
+ .isEqualTo(
+ TikaConfiguration.builder()
+ .host("172.0.0.5")
+ .port(889)
+ .timeoutInMillis(500)
+ .cacheWeightInBytes(1520000)
+ .build());
+ }
+
+
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: server-dev-unsubscribe@james.apache.org
For additional commands, e-mail: server-dev-help@james.apache.org