You are viewing a plain text version of this content. The canonical link for it is here.
Posted to server-dev@james.apache.org by ma...@apache.org on 2016/07/27 09:00:00 UTC

james-project git commit: JAMES-1811 use json for html to text conversion

Repository: james-project
Updated Branches:
  refs/heads/master 7c89479a4 -> fbc09db2f


JAMES-1811 use json for html to text conversion


Project: http://git-wip-us.apache.org/repos/asf/james-project/repo
Commit: http://git-wip-us.apache.org/repos/asf/james-project/commit/fbc09db2
Tree: http://git-wip-us.apache.org/repos/asf/james-project/tree/fbc09db2
Diff: http://git-wip-us.apache.org/repos/asf/james-project/diff/fbc09db2

Branch: refs/heads/master
Commit: fbc09db2f877d32b2f6d57e96ae06c96668f0842
Parents: 7c89479
Author: Matthieu Baechler <ma...@linagora.com>
Authored: Tue Jul 26 16:25:39 2016 +0200
Committer: Matthieu Baechler <ma...@linagora.com>
Committed: Wed Jul 27 10:59:35 2016 +0200

----------------------------------------------------------------------
 mailbox/memory/pom.xml                          |  5 ++
 .../mailbox/inmemory/JsoupTextExtractor.java    | 50 ++++++++++++++++++++
 server/container/guice/memory-guice/pom.xml     |  4 --
 .../modules/data/MemoryDataJmapModule.java      |  7 +--
 .../modules/mailbox/MemoryMailboxModule.java    |  4 +-
 .../integration/SetMessagesMethodTest.java      |  3 +-
 .../cucumber/GetMessagesMethodStepdefs.java     |  3 +-
 7 files changed, 65 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/james-project/blob/fbc09db2/mailbox/memory/pom.xml
----------------------------------------------------------------------
diff --git a/mailbox/memory/pom.xml b/mailbox/memory/pom.xml
index 0667a91..a164e25 100644
--- a/mailbox/memory/pom.xml
+++ b/mailbox/memory/pom.xml
@@ -81,6 +81,11 @@
             <scope>test</scope>
         </dependency>
         <dependency>
+            <groupId>org.jsoup</groupId>
+            <artifactId>jsoup</artifactId>
+            <version>1.9.2</version>
+        </dependency>
+        <dependency>
             <groupId>org.xenei</groupId>
             <artifactId>junit-contracts</artifactId>
             <scope>test</scope>

http://git-wip-us.apache.org/repos/asf/james-project/blob/fbc09db2/mailbox/memory/src/main/java/org/apache/james/mailbox/inmemory/JsoupTextExtractor.java
----------------------------------------------------------------------
diff --git a/mailbox/memory/src/main/java/org/apache/james/mailbox/inmemory/JsoupTextExtractor.java b/mailbox/memory/src/main/java/org/apache/james/mailbox/inmemory/JsoupTextExtractor.java
new file mode 100644
index 0000000..6f2de77
--- /dev/null
+++ b/mailbox/memory/src/main/java/org/apache/james/mailbox/inmemory/JsoupTextExtractor.java
@@ -0,0 +1,50 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+
+package org.apache.james.mailbox.inmemory;
+
+import java.io.InputStream;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.james.mailbox.store.extractor.ParsedContent;
+import org.apache.james.mailbox.store.extractor.TextExtractor;
+import org.jsoup.Jsoup;
+
+import com.google.common.collect.Maps;
+
+
+public class JsoupTextExtractor implements TextExtractor {
+
+    @Override
+    public ParsedContent extractContent(InputStream inputStream, String contentType, String fileName) throws Exception {
+        Map<String, List<String>> emptyMetadata = Maps.newHashMap();
+        if (contentType != null) {
+           if (contentType.equals("text/plain")) {
+            return new ParsedContent(IOUtils.toString(inputStream), emptyMetadata);
+           }
+           if (contentType.equals("text/html")) {
+               String text = Jsoup.parse(IOUtils.toString(inputStream)).text();
+               return new ParsedContent(text, emptyMetadata);
+           }
+        }
+        return new ParsedContent(null, emptyMetadata);
+    }
+}

http://git-wip-us.apache.org/repos/asf/james-project/blob/fbc09db2/server/container/guice/memory-guice/pom.xml
----------------------------------------------------------------------
diff --git a/server/container/guice/memory-guice/pom.xml b/server/container/guice/memory-guice/pom.xml
index 53124a1..09ba08a 100644
--- a/server/container/guice/memory-guice/pom.xml
+++ b/server/container/guice/memory-guice/pom.xml
@@ -203,10 +203,6 @@
                 </dependency>
                 <dependency>
                     <groupId>${project.groupId}</groupId>
-                    <artifactId>apache-james-mailbox-tika</artifactId>
-                </dependency>
-                <dependency>
-                    <groupId>${project.groupId}</groupId>
                     <artifactId>james-server-data-memory</artifactId>
                 </dependency>
                 <dependency>

http://git-wip-us.apache.org/repos/asf/james-project/blob/fbc09db2/server/container/guice/memory-guice/src/main/java/org/apache/james/modules/data/MemoryDataJmapModule.java
----------------------------------------------------------------------
diff --git a/server/container/guice/memory-guice/src/main/java/org/apache/james/modules/data/MemoryDataJmapModule.java b/server/container/guice/memory-guice/src/main/java/org/apache/james/modules/data/MemoryDataJmapModule.java
index 02924b1..7eef012 100644
--- a/server/container/guice/memory-guice/src/main/java/org/apache/james/modules/data/MemoryDataJmapModule.java
+++ b/server/container/guice/memory-guice/src/main/java/org/apache/james/modules/data/MemoryDataJmapModule.java
@@ -25,8 +25,9 @@ import org.apache.james.jmap.api.vacation.VacationRepository;
 import org.apache.james.jmap.memory.access.MemoryAccessTokenRepository;
 import org.apache.james.jmap.memory.vacation.MemoryNotificationRegistry;
 import org.apache.james.jmap.memory.vacation.MemoryVacationRepository;
+import org.apache.james.mailbox.inmemory.JsoupTextExtractor;
+import org.apache.james.mailbox.store.extractor.DefaultTextExtractor;
 import org.apache.james.mailbox.store.extractor.TextExtractor;
-import org.apache.james.mailbox.tika.extractor.TikaTextExtractor;
 
 import com.google.inject.AbstractModule;
 import com.google.inject.Scopes;
@@ -44,7 +45,7 @@ public class MemoryDataJmapModule extends AbstractModule {
         bind(MemoryNotificationRegistry.class).in(Scopes.SINGLETON);
         bind(NotificationRegistry.class).to(MemoryNotificationRegistry.class);
 
-        bind(TikaTextExtractor.class).in(Scopes.SINGLETON);
-        bind(TextExtractor.class).to(TikaTextExtractor.class);
+        bind(DefaultTextExtractor.class).in(Scopes.SINGLETON);
+        bind(TextExtractor.class).to(JsoupTextExtractor.class);
     }
 }

http://git-wip-us.apache.org/repos/asf/james-project/blob/fbc09db2/server/container/guice/memory-guice/src/main/java/org/apache/james/modules/mailbox/MemoryMailboxModule.java
----------------------------------------------------------------------
diff --git a/server/container/guice/memory-guice/src/main/java/org/apache/james/modules/mailbox/MemoryMailboxModule.java b/server/container/guice/memory-guice/src/main/java/org/apache/james/modules/mailbox/MemoryMailboxModule.java
index e735349..4bc7a0a 100644
--- a/server/container/guice/memory-guice/src/main/java/org/apache/james/modules/mailbox/MemoryMailboxModule.java
+++ b/server/container/guice/memory-guice/src/main/java/org/apache/james/modules/mailbox/MemoryMailboxModule.java
@@ -33,6 +33,7 @@ import org.apache.james.mailbox.exception.MailboxException;
 import org.apache.james.mailbox.inmemory.InMemoryId;
 import org.apache.james.mailbox.inmemory.InMemoryMailboxManager;
 import org.apache.james.mailbox.inmemory.InMemoryMailboxSessionMapperFactory;
+import org.apache.james.mailbox.inmemory.JsoupTextExtractor;
 import org.apache.james.mailbox.inmemory.mail.InMemoryModSeqProvider;
 import org.apache.james.mailbox.inmemory.mail.InMemoryUidProvider;
 import org.apache.james.mailbox.model.MailboxId;
@@ -49,7 +50,6 @@ import org.apache.james.mailbox.store.mail.UidProvider;
 import org.apache.james.mailbox.store.search.MessageSearchIndex;
 import org.apache.james.mailbox.store.search.SimpleMessageSearchIndex;
 import org.apache.james.mailbox.store.user.SubscriptionMapperFactory;
-import org.apache.james.mailbox.tika.extractor.TikaTextExtractor;
 import org.apache.james.modules.Names;
 
 import com.google.inject.AbstractModule;
@@ -80,7 +80,7 @@ public class MemoryMailboxModule extends AbstractModule {
         bind(GroupMembershipResolver.class).to(SimpleGroupMembershipResolver.class);
 
         bind(MessageSearchIndex.class).to(SimpleMessageSearchIndex.class);
-        bind(TextExtractor.class).to(TikaTextExtractor.class);
+        bind(TextExtractor.class).to(JsoupTextExtractor.class);
 
         bind(InMemoryMailboxSessionMapperFactory.class).in(Scopes.SINGLETON);
         bind(InMemoryModSeqProvider.class).in(Scopes.SINGLETON);

http://git-wip-us.apache.org/repos/asf/james-project/blob/fbc09db2/server/protocols/jmap-integration-testing/jmap-integration-testing-common/src/test/java/org/apache/james/jmap/methods/integration/SetMessagesMethodTest.java
----------------------------------------------------------------------
diff --git a/server/protocols/jmap-integration-testing/jmap-integration-testing-common/src/test/java/org/apache/james/jmap/methods/integration/SetMessagesMethodTest.java b/server/protocols/jmap-integration-testing/jmap-integration-testing-common/src/test/java/org/apache/james/jmap/methods/integration/SetMessagesMethodTest.java
index af0dc11..2bd9003 100644
--- a/server/protocols/jmap-integration-testing/jmap-integration-testing-common/src/test/java/org/apache/james/jmap/methods/integration/SetMessagesMethodTest.java
+++ b/server/protocols/jmap-integration-testing/jmap-integration-testing-common/src/test/java/org/apache/james/jmap/methods/integration/SetMessagesMethodTest.java
@@ -37,6 +37,7 @@ import static org.hamcrest.Matchers.nullValue;
 import static org.hamcrest.collection.IsMapWithSize.aMapWithSize;
 import static org.hamcrest.collection.IsMapWithSize.anEmptyMap;
 
+
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.time.ZonedDateTime;
@@ -47,7 +48,7 @@ import java.util.concurrent.TimeUnit;
 
 import javax.mail.Flags;
 
-import org.apache.commons.compress.utils.IOUtils;
+import org.apache.commons.io.IOUtils;
 import org.apache.james.GuiceJamesServer;
 import org.apache.james.jmap.JmapAuthentication;
 import org.apache.james.jmap.api.access.AccessToken;

http://git-wip-us.apache.org/repos/asf/james-project/blob/fbc09db2/server/protocols/jmap-integration-testing/jmap-integration-testing-common/src/test/java/org/apache/james/jmap/methods/integration/cucumber/GetMessagesMethodStepdefs.java
----------------------------------------------------------------------
diff --git a/server/protocols/jmap-integration-testing/jmap-integration-testing-common/src/test/java/org/apache/james/jmap/methods/integration/cucumber/GetMessagesMethodStepdefs.java b/server/protocols/jmap-integration-testing/jmap-integration-testing-common/src/test/java/org/apache/james/jmap/methods/integration/cucumber/GetMessagesMethodStepdefs.java
index bdaaff1..e12a0f4 100644
--- a/server/protocols/jmap-integration-testing/jmap-integration-testing-common/src/test/java/org/apache/james/jmap/methods/integration/cucumber/GetMessagesMethodStepdefs.java
+++ b/server/protocols/jmap-integration-testing/jmap-integration-testing-common/src/test/java/org/apache/james/jmap/methods/integration/cucumber/GetMessagesMethodStepdefs.java
@@ -275,7 +275,8 @@ public class GetMessagesMethodStepdefs {
 
     @Then("^the preview of the message is \"([^\"]*)\"$")
     public void assertPreviewOfTheFirstMessage(String preview) throws Throwable {
-        assertThat(jsonPath.<String>read(FIRST_MESSAGE + ".preview")).isEqualTo(StringEscapeUtils.unescapeJava(preview));
+        String actual = jsonPath.<String>read(FIRST_MESSAGE + ".preview").replace("\n", " ");
+        assertThat(actual).isEqualToIgnoringWhitespace(StringEscapeUtils.unescapeJava(preview));
     }
 
     @Then("^the headers of the message contains:$")


---------------------------------------------------------------------
To unsubscribe, e-mail: server-dev-unsubscribe@james.apache.org
For additional commands, e-mail: server-dev-help@james.apache.org