You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by le...@apache.org on 2016/10/26 02:37:12 UTC

[7/7] tika git commit: TIKA-1343 Create a Tika Translator implementation that uses JoshuaDecoder

TIKA-1343 Create a Tika Translator implementation that uses JoshuaDecoder


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/dadbf55c
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/dadbf55c
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/dadbf55c

Branch: refs/heads/master
Commit: dadbf55c51d166846aa0d365fd2ed340b604bfae
Parents: 5657ae6
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Mon Oct 24 22:20:04 2016 -0700
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Mon Oct 24 22:20:04 2016 -0700

----------------------------------------------------------------------
 .../translate/JoshuaNetworkTranslator.java      | 44 ++++++++++----------
 ...rg.apache.tika.language.translate.Translator |  3 +-
 .../translate/translator.joshua.properties      |  2 +-
 .../translate/JoshuaNetworkTranslatorTest.java  |  2 +-
 4 files changed, 27 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/dadbf55c/tika-translate/src/main/java/org/apache/tika/language/translate/JoshuaNetworkTranslator.java
----------------------------------------------------------------------
diff --git a/tika-translate/src/main/java/org/apache/tika/language/translate/JoshuaNetworkTranslator.java b/tika-translate/src/main/java/org/apache/tika/language/translate/JoshuaNetworkTranslator.java
index 8e1f768..8cf0adf 100644
--- a/tika-translate/src/main/java/org/apache/tika/language/translate/JoshuaNetworkTranslator.java
+++ b/tika-translate/src/main/java/org/apache/tika/language/translate/JoshuaNetworkTranslator.java
@@ -22,6 +22,8 @@ import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.Properties;
 
 import javax.ws.rs.core.MediaType;
@@ -35,6 +37,8 @@ import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.core.JsonParseException;
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import com.fasterxml.jackson.jaxrs.json.JacksonJsonProvider;
 
 /**
  * <p>This translator is designed to work with a TCP-IP available
@@ -57,7 +61,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
  * so this translation implementation takes care of that.
  */
 public class JoshuaNetworkTranslator extends AbstractTranslator {
-  
+
   private static final Logger LOG = LoggerFactory.getLogger(JoshuaNetworkTranslator.class);
 
   private static final String PROPERTIES_FILE = "translator.joshua.properties";
@@ -65,8 +69,6 @@ public class JoshuaNetworkTranslator extends AbstractTranslator {
   private static final String JOSHUA_SERVER = "joshua.server.url";
 
   private String networkServer;
-  
-  private WebClient client;
 
   /**
    * Default constructor which first checks for the presence of
@@ -124,40 +126,40 @@ public class JoshuaNetworkTranslator extends AbstractTranslator {
     }
 
     String inputText = sb.toString();
+    WebClient client;
+    final List<Object> providers = new ArrayList<>();
+    JacksonJsonProvider jacksonJsonProvider = new JacksonJsonProvider();
+    providers.add(jacksonJsonProvider);
 
     //create client
     if (!networkServer.endsWith("/")) {
-      client = WebClient.create(networkServer + "/" + targetLanguage + "/");
+      client = WebClient.create(networkServer + "/" + targetLanguage, providers);
     } else {
-      client = WebClient.create(networkServer + targetLanguage + "/");
+      client = WebClient.create(networkServer + targetLanguage, providers);
     }
 
+    ObjectMapper requestMapper = new ObjectMapper();
+    ObjectNode jsonNode = requestMapper.createObjectNode();
+    jsonNode.put("inputLanguage", sourceLanguage);
+    jsonNode.put("inputText", inputText);
     //make the reuest
-    Response response = client.accept(MediaType.APPLICATION_JSON)
-        .query("inputLanguage", sourceLanguage)
-        .query("inputText", inputText).get();
+    Response response = client.accept(MediaType.APPLICATION_JSON).type(MediaType.APPLICATION_JSON).post(jsonNode);
     BufferedReader reader = new BufferedReader(new InputStreamReader(
         (InputStream) response.getEntity(), UTF_8));
     String line;
-    StringBuffer responseText = new StringBuffer();
+    StringBuilder responseText = new StringBuilder();
     while ((line = reader.readLine()) != null) {
       responseText.append(line);
     }
 
     try {
-      ObjectMapper mapper = new ObjectMapper();
-      JsonNode jsonResp = mapper.readTree(responseText.toString());
-
-      if (!jsonResp.findValuesAsText("code").isEmpty()) {
-        String code = jsonResp.findValuesAsText("code").get(0);
-        if ("200".equals(code)) {
-          return jsonResp.findValue("text").get(0).asText();
-        } else {
-          throw new TikaException(jsonResp.findValue("message").get(0).asText());
-        }
+      ObjectMapper responseMapper = new ObjectMapper();
+      JsonNode jsonResp = responseMapper.readTree(responseText.toString());
+
+      if (jsonResp.findValuesAsText("outputText") != null) {
+        return jsonResp.findValuesAsText("outputText").get(0);
       } else {
-        throw new TikaException("Return message not recognized: " + 
-            responseText.toString().substring(0, Math.min(responseText.length(), 100)));
+        throw new TikaException(jsonResp.findValue("message").get(0).asText());
       }
     } catch (JsonParseException e) {
       throw new TikaException("Error requesting translation from '" + 

http://git-wip-us.apache.org/repos/asf/tika/blob/dadbf55c/tika-translate/src/main/resources/META-INF/services/org.apache.tika.language.translate.Translator
----------------------------------------------------------------------
diff --git a/tika-translate/src/main/resources/META-INF/services/org.apache.tika.language.translate.Translator b/tika-translate/src/main/resources/META-INF/services/org.apache.tika.language.translate.Translator
index 773daf3..f3dcad4 100644
--- a/tika-translate/src/main/resources/META-INF/services/org.apache.tika.language.translate.Translator
+++ b/tika-translate/src/main/resources/META-INF/services/org.apache.tika.language.translate.Translator
@@ -16,4 +16,5 @@
 org.apache.tika.language.translate.MicrosoftTranslator
 org.apache.tika.language.translate.GoogleTranslator
 org.apache.tika.language.translate.Lingo24Translator
-org.apache.tika.language.translate.CachedTranslator
\ No newline at end of file
+org.apache.tika.language.translate.CachedTranslator
+org.apache.tika.language.translate.JoshuaNetworkTranslator

http://git-wip-us.apache.org/repos/asf/tika/blob/dadbf55c/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.joshua.properties
----------------------------------------------------------------------
diff --git a/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.joshua.properties b/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.joshua.properties
index 4894f48..53bd773 100644
--- a/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.joshua.properties
+++ b/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.joshua.properties
@@ -19,4 +19,4 @@
 # if left as null, then translation will not occur and the source text
 # will be returned. 
 # An example would be http://localhost:5000/joshua/translate/
-joshua.server=http://localhost:5000/joshua/translate/
\ No newline at end of file
+joshua.server.url=http://localhost:5000/joshua/translate/
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/dadbf55c/tika-translate/src/test/java/org/apache/tika/language/translate/JoshuaNetworkTranslatorTest.java
----------------------------------------------------------------------
diff --git a/tika-translate/src/test/java/org/apache/tika/language/translate/JoshuaNetworkTranslatorTest.java b/tika-translate/src/test/java/org/apache/tika/language/translate/JoshuaNetworkTranslatorTest.java
index 2cf7b3a..4413926 100644
--- a/tika-translate/src/test/java/org/apache/tika/language/translate/JoshuaNetworkTranslatorTest.java
+++ b/tika-translate/src/test/java/org/apache/tika/language/translate/JoshuaNetworkTranslatorTest.java
@@ -34,7 +34,7 @@ public class JoshuaNetworkTranslatorTest {
   public void testSimpleSpanishToEnglishTranslation() throws Exception {
     String source = "hola";
     String expected = "hello";
-    String translated = translator.translate(source, "es", "en");
+    String translated = translator.translate(source, "spanish", "english");
     if (translator.isAvailable()) assertTrue("Translate " + source + " to " + expected + " (was " + translated + ")",
         expected.equalsIgnoreCase(translated));
   }