You are viewing a plain text version of this content. The canonical link for it is here.
Posted to server-dev@james.apache.org by bt...@apache.org on 2015/06/29 10:45:02 UTC
svn commit: r1688145 - in /james/mailbox/trunk/elasticsearch: ./
src/main/java/org/apache/james/mailbox/elasticsearch/json/extractor/
src/test/java/org/apache/james/mailbox/elasticsearch/json/extractor/
src/test/resources/documents/
Author: btellier
Date: Mon Jun 29 08:45:01 2015
New Revision: 1688145
URL: http://svn.apache.org/r1688145
Log:
MAILBOX-245 Adding text extractors
Added:
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/extractor/
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/extractor/DefaultTextExtractor.java
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/extractor/ParsedContent.java
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/extractor/TextExtractor.java
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/extractor/TikaTextExtractor.java
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/extractor/
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/extractor/DefaultTextExtractorTest.java
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/extractor/TikaTextExtractorTest.java
james/mailbox/trunk/elasticsearch/src/test/resources/documents/PDF.pdf
james/mailbox/trunk/elasticsearch/src/test/resources/documents/Text.txt
james/mailbox/trunk/elasticsearch/src/test/resources/documents/calc.ods
james/mailbox/trunk/elasticsearch/src/test/resources/documents/calc.xlsx
james/mailbox/trunk/elasticsearch/src/test/resources/documents/fake.txt
james/mailbox/trunk/elasticsearch/src/test/resources/documents/slides.odp
james/mailbox/trunk/elasticsearch/src/test/resources/documents/slides.pptx
james/mailbox/trunk/elasticsearch/src/test/resources/documents/writter.docx
james/mailbox/trunk/elasticsearch/src/test/resources/documents/writter.odt
Modified:
james/mailbox/trunk/elasticsearch/pom.xml
Modified: james/mailbox/trunk/elasticsearch/pom.xml
URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/pom.xml?rev=1688145&r1=1688144&r2=1688145&view=diff
==============================================================================
--- james/mailbox/trunk/elasticsearch/pom.xml (original)
+++ james/mailbox/trunk/elasticsearch/pom.xml Mon Jun 29 08:45:01 2015
@@ -103,6 +103,16 @@
<scope>test</scope>
</dependency>
<dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-core</artifactId>
+ <version>1.7</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-parsers</artifactId>
+ <version>1.7</version>
+ </dependency>
+ <dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<version>3.0.0</version>
Added: james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/extractor/DefaultTextExtractor.java
URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/extractor/DefaultTextExtractor.java?rev=1688145&view=auto
==============================================================================
--- james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/extractor/DefaultTextExtractor.java (added)
+++ james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/extractor/DefaultTextExtractor.java Mon Jun 29 08:45:01 2015
@@ -0,0 +1,44 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ ****************************************************************/
+
+package org.apache.james.mailbox.elasticsearch.json.extractor;
+
+import java.io.InputStream;
+import java.util.Optional;
+
+import com.google.common.collect.ArrayListMultimap;
+import com.google.common.collect.ImmutableMultimap;
+import org.apache.commons.io.IOUtils;
+
+/**
+ * A default text extractor that is directly based on the input file provided.
+ *
+ * Costs less calculations that TikaTextExtractor, but result is not that good.
+ */
+public class DefaultTextExtractor implements TextExtractor {
+
+ @Override
+ public ParsedContent extractContent(InputStream inputStream, Optional<String> contentType, Optional<String> fileName) throws Exception {
+ if(contentType.isPresent() && contentType.get().startsWith("text/") ) {
+ return new ParsedContent(Optional.of(IOUtils.toString(inputStream)), ImmutableMultimap.copyOf(ArrayListMultimap.create()));
+ } else {
+ return new ParsedContent(Optional.empty(), ImmutableMultimap.of());
+ }
+ }
+}
Added: james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/extractor/ParsedContent.java
URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/extractor/ParsedContent.java?rev=1688145&view=auto
==============================================================================
--- james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/extractor/ParsedContent.java (added)
+++ james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/extractor/ParsedContent.java Mon Jun 29 08:45:01 2015
@@ -0,0 +1,58 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ ****************************************************************/
+
+package org.apache.james.mailbox.elasticsearch.json.extractor;
+
+import java.util.Objects;
+import java.util.Optional;
+
+import com.google.common.collect.ImmutableMultimap;
+import com.google.common.collect.Multimap;
+
+public class ParsedContent {
+
+ private final Optional<String> textualContent;
+ private final ImmutableMultimap<String, String> metadata;
+
+ public ParsedContent(Optional<String> textualContent, Multimap<String, String> metadata) {
+ this.textualContent = textualContent;
+ this.metadata = ImmutableMultimap.copyOf(metadata);
+ }
+
+ public Optional<String> getTextualContent() {
+ return textualContent;
+ }
+
+ public Multimap<String, String> getMetadata() {
+ return metadata;
+ }
+
+ @Override public boolean equals(Object o) {
+ if (o instanceof ParsedContent) {
+ ParsedContent other = (ParsedContent) o;
+ return Objects.equals(textualContent, other.textualContent)
+ && Objects.equals(metadata, other.metadata);
+ }
+ return false;
+ }
+
+ @Override public int hashCode() {
+ return Objects.hash(textualContent, metadata);
+ }
+}
Added: james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/extractor/TextExtractor.java
URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/extractor/TextExtractor.java?rev=1688145&view=auto
==============================================================================
--- james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/extractor/TextExtractor.java (added)
+++ james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/extractor/TextExtractor.java Mon Jun 29 08:45:01 2015
@@ -0,0 +1,29 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ ****************************************************************/
+
+package org.apache.james.mailbox.elasticsearch.json.extractor;
+
+import java.io.InputStream;
+import java.util.Optional;
+
+public interface TextExtractor {
+
+ ParsedContent extractContent(InputStream inputStream, Optional<String> contentType, Optional<String> fileName) throws Exception;
+
+}
Added: james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/extractor/TikaTextExtractor.java
URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/extractor/TikaTextExtractor.java?rev=1688145&view=auto
==============================================================================
--- james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/extractor/TikaTextExtractor.java (added)
+++ james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/extractor/TikaTextExtractor.java Mon Jun 29 08:45:01 2015
@@ -0,0 +1,88 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ ****************************************************************/
+
+package org.apache.james.mailbox.elasticsearch.json.extractor;
+
+import java.io.InputStream;
+import java.io.StringWriter;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Optional;
+
+import com.google.common.collect.ArrayListMultimap;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Multimap;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.BodyContentHandler;
+
+public class TikaTextExtractor implements TextExtractor {
+
+ private static class MetadataEntry {
+
+ private final String name;
+ private final ImmutableList<String> entries;
+
+ public MetadataEntry(String name, List<String> entries) {
+ this.name = name;
+ this.entries = ImmutableList.copyOf(entries);
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public List<String> getEntries() {
+ return entries;
+ }
+ }
+
+ private final Parser parser;
+
+ public TikaTextExtractor() {
+ parser = new AutoDetectParser();
+ }
+
+ public ParsedContent extractContent(InputStream inputStream, Optional<String> contentType, Optional<String> fileName) throws Exception {
+ Metadata metadata = new Metadata();
+ fileName.ifPresent(x -> metadata.set(Metadata.RESOURCE_NAME_KEY, x));
+ contentType.ifPresent(x -> metadata.set(Metadata.CONTENT_TYPE, x));
+
+ StringWriter stringWriter = new StringWriter();
+ BodyContentHandler bodyContentHandler = new BodyContentHandler(stringWriter);
+ parser.parse(inputStream, bodyContentHandler, metadata, new ParseContext());
+
+ return new ParsedContent(Optional.of(stringWriter.toString()), convertMetadataToMultimap(metadata));
+ }
+
+ private Multimap<String, String> convertMetadataToMultimap(Metadata metadata) {
+ return Arrays.stream(metadata.names())
+ .map(name -> new MetadataEntry(name, Arrays.asList(metadata.getValues(name))))
+ .reduce(ArrayListMultimap.create(), (metadataMultiMap, metadataEntry) -> {
+ metadataMultiMap.putAll(metadataEntry.getName(), metadataEntry.getEntries());
+ return metadataMultiMap;
+ }, (metadataMultimap1, metadataMultimap2) -> {
+ metadataMultimap1.putAll(metadataMultimap2);
+ return metadataMultimap1;
+ });
+ }
+
+}
Added: james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/extractor/DefaultTextExtractorTest.java
URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/extractor/DefaultTextExtractorTest.java?rev=1688145&view=auto
==============================================================================
--- james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/extractor/DefaultTextExtractorTest.java (added)
+++ james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/extractor/DefaultTextExtractorTest.java Mon Jun 29 08:45:01 2015
@@ -0,0 +1,58 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ ****************************************************************/
+
+package org.apache.james.mailbox.elasticsearch.json.extractor;
+
+import java.io.InputStream;
+import java.util.Optional;
+
+import org.junit.Before;
+import org.junit.Test;
+import static org.assertj.core.api.Assertions.assertThat;
+
+public class DefaultTextExtractorTest {
+ private TextExtractor textExtractor;
+
+ @Before
+ public void setUp() {
+ textExtractor = new DefaultTextExtractor();
+ }
+
+ @Test
+ public void textTest() throws Exception {
+ InputStream inputStream = ClassLoader.getSystemResourceAsStream("documents/Text.txt");
+ assertThat(inputStream).isNotNull();
+ assertThat(textExtractor.extractContent(inputStream, Optional.of("text/plain"), Optional.of("Text.txt"))
+ .getTextualContent()
+ .get())
+ .isEqualTo("This is some awesome text text.\n\n");
+ }
+
+ @Test
+ public void textMicrosoftWorldTest() throws Exception {
+ InputStream inputStream = ClassLoader.getSystemResourceAsStream("documents/writter.docx");
+ assertThat(inputStream).isNotNull();
+ assertThat(textExtractor.extractContent(
+ inputStream,
+ Optional.of("application/vnd.openxmlformats-officedocument.wordprocessingml.document"),
+ Optional.of("writter.docx"))
+ .getTextualContent())
+ .isEmpty();
+ }
+}
Added: james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/extractor/TikaTextExtractorTest.java
URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/extractor/TikaTextExtractorTest.java?rev=1688145&view=auto
==============================================================================
--- james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/extractor/TikaTextExtractorTest.java (added)
+++ james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/extractor/TikaTextExtractorTest.java Mon Jun 29 08:45:01 2015
@@ -0,0 +1,179 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ ****************************************************************/
+
+package org.apache.james.mailbox.elasticsearch.json.extractor;
+
+import java.io.InputStream;
+import java.util.Optional;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+import org.junit.Before;
+import org.junit.Test;
+
+public class TikaTextExtractorTest {
+
+ private TextExtractor textExtractor;
+
+ @Before
+ public void setUp() {
+ textExtractor = new TikaTextExtractor();
+ }
+
+ @Test
+ public void textTest() throws Exception {
+ InputStream inputStream = ClassLoader.getSystemResourceAsStream("documents/Text.txt");
+ assertThat(inputStream).isNotNull();
+ assertThat(
+ textExtractor.extractContent(
+ inputStream,
+ Optional.of("text/plain"),
+ Optional.of("Text.txt"))
+ .getTextualContent()
+ .get())
+ .isEqualTo("This is some awesome text text.\n\n\n");
+ }
+
+ @Test
+ public void textMicrosoftWorldTest() throws Exception {
+ InputStream inputStream = ClassLoader.getSystemResourceAsStream("documents/writter.docx");
+ assertThat(inputStream).isNotNull();
+ assertThat(
+ textExtractor.extractContent(
+ inputStream,
+ Optional.of("application/vnd.openxmlformats-officedocument.wordprocessingml.document"),
+ Optional.of("writter.docx"))
+ .getTextualContent()
+ .get())
+ .isEqualTo("This is an awesome document on libroffice writter !\n");
+ }
+
+ @Test
+ public void textOdtTest() throws Exception {
+ InputStream inputStream = ClassLoader.getSystemResourceAsStream("documents/writter.odt");
+ assertThat(inputStream).isNotNull();
+ assertThat(
+ textExtractor.extractContent(
+ inputStream,
+ Optional.of("application/vnd.oasis.opendocument.text"),
+ Optional.of("writter.odt"))
+ .getTextualContent()
+ .get())
+ .isEqualTo("This is an awesome document on libroffice writter !\n");
+ }
+
+ @Test
+ public void documentWithBadDeclaredMetadataShouldBeWellHandled() throws Exception {
+ InputStream inputStream = ClassLoader.getSystemResourceAsStream("documents/fake.txt");
+ assertThat(inputStream).isNotNull();
+ assertThat(
+ textExtractor.extractContent(
+ inputStream,
+ Optional.of("application/vnd.oasis.opendocument.text"),
+ Optional.of("writter.odt"))
+ .getTextualContent()
+ .get())
+ .isEqualTo("This is an awesome document on libroffice writter !\n");
+ }
+
+ @Test
+ public void slidePowerPointTest() throws Exception {
+ InputStream inputStream = ClassLoader.getSystemResourceAsStream("documents/slides.pptx");
+ assertThat(inputStream).isNotNull();
+ assertThat(
+ textExtractor.extractContent(
+ inputStream,
+ Optional.of("application/vnd.openxmlformats-officedocument.presentationml.presentation"),
+ Optional.of("slides.pptx"))
+ .getTextualContent()
+ .get())
+ .isEqualTo("James is awesome\nIt manages attachments so well !\n");
+ }
+
+ @Test
+ public void slideOdpTest() throws Exception {
+ InputStream inputStream = ClassLoader.getSystemResourceAsStream("documents/slides.odp");
+ assertThat(inputStream).isNotNull();
+ assertThat(
+ textExtractor.extractContent(
+ inputStream,
+ Optional.of("application/vnd.oasis.opendocument.presentation"),
+ Optional.of("slides.odp"))
+ .getTextualContent()
+ .get())
+ .isEqualTo("James is awesome\n\nIt manages attachments so well !\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n");
+ }
+
+ @Test
+ public void pdfTest() throws Exception {
+ InputStream inputStream = ClassLoader.getSystemResourceAsStream("documents/PDF.pdf");
+ assertThat(inputStream).isNotNull();
+ assertThat(
+ textExtractor.extractContent(
+ inputStream,
+ Optional.of("application/pdf"),
+ Optional.of("PDF.pdf"))
+ .getTextualContent()
+ .get())
+ .isEqualTo("\nThis is an awesome document on libroffice writter !\n\n\n");
+ }
+
+ @Test
+ public void odsTest() throws Exception {
+ InputStream inputStream = ClassLoader.getSystemResourceAsStream("documents/calc.ods");
+ assertThat(inputStream).isNotNull();
+ assertThat(
+ textExtractor.extractContent(
+ inputStream,
+ Optional.of("application/vnd.oasis.opendocument.spreadsheet"),
+ Optional.of("calc.ods"))
+ .getTextualContent()
+ .get())
+ .isEqualTo("\tThis is an aesome LibreOffice document !\n" +
+ "\n" +
+ "\n" +
+ "???\n" +
+ "Page \n" +
+ "??? (???)\n" +
+ "00/00/0000, 00:00:00\n" +
+ "Page / \n");
+ }
+
+ @Test
+ public void excelTest() throws Exception {
+ InputStream inputStream = ClassLoader.getSystemResourceAsStream("documents/calc.xlsx");
+ assertThat(inputStream).isNotNull();
+ assertThat(
+ textExtractor.extractContent(
+ inputStream,
+ Optional.of("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"),
+ Optional.of("calc.xlsx"))
+ .getTextualContent()
+ .get())
+ .isEqualTo("Feuille1\n" +
+ "\tThis is an aesome LibreOffice document !\n" +
+ "\n" +
+ "&A\t\n" +
+ "\n" +
+ "Page &P\t\n" +
+ "\n" +
+ "\n");
+ }
+
+}
Added: james/mailbox/trunk/elasticsearch/src/test/resources/documents/PDF.pdf
URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/resources/documents/PDF.pdf?rev=1688145&view=auto
==============================================================================
(empty)
Added: james/mailbox/trunk/elasticsearch/src/test/resources/documents/Text.txt
URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/resources/documents/Text.txt?rev=1688145&view=auto
==============================================================================
--- james/mailbox/trunk/elasticsearch/src/test/resources/documents/Text.txt (added)
+++ james/mailbox/trunk/elasticsearch/src/test/resources/documents/Text.txt Mon Jun 29 08:45:01 2015
@@ -0,0 +1,2 @@
+This is some awesome text text.
+
Added: james/mailbox/trunk/elasticsearch/src/test/resources/documents/calc.ods
URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/resources/documents/calc.ods?rev=1688145&view=auto
==============================================================================
(empty)
Added: james/mailbox/trunk/elasticsearch/src/test/resources/documents/calc.xlsx
URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/resources/documents/calc.xlsx?rev=1688145&view=auto
==============================================================================
(empty)
Added: james/mailbox/trunk/elasticsearch/src/test/resources/documents/fake.txt
URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/resources/documents/fake.txt?rev=1688145&view=auto
==============================================================================
(empty)
Added: james/mailbox/trunk/elasticsearch/src/test/resources/documents/slides.odp
URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/resources/documents/slides.odp?rev=1688145&view=auto
==============================================================================
(empty)
Added: james/mailbox/trunk/elasticsearch/src/test/resources/documents/slides.pptx
URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/resources/documents/slides.pptx?rev=1688145&view=auto
==============================================================================
(empty)
Added: james/mailbox/trunk/elasticsearch/src/test/resources/documents/writter.docx
URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/resources/documents/writter.docx?rev=1688145&view=auto
==============================================================================
(empty)
Added: james/mailbox/trunk/elasticsearch/src/test/resources/documents/writter.odt
URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/resources/documents/writter.odt?rev=1688145&view=auto
==============================================================================
(empty)
---------------------------------------------------------------------
To unsubscribe, e-mail: server-dev-unsubscribe@james.apache.org
For additional commands, e-mail: server-dev-help@james.apache.org