You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by dm...@apache.org on 2014/07/31 20:29:32 UTC
svn commit: r1614950 - in /tika/tags/1.6: ./
tika-translate/src/main/java/org/apache/tika/language/translate/
tika-translate/src/main/resources/META-INF/services/
tika-translate/src/main/resources/org/apache/tika/language/translate/
tika-translate/src/...
Author: dmeikle
Date: Thu Jul 31 18:29:32 2014
New Revision: 1614950
URL: http://svn.apache.org/r1614950
Log:
TIKA-1381 - Added Lingo24Translator implementation
Added:
tika/tags/1.6/tika-translate/src/main/java/org/apache/tika/language/translate/Lingo24Translator.java (with props)
tika/tags/1.6/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.lingo24.properties (with props)
tika/tags/1.6/tika-translate/src/test/java/org/apache/tika/language/translate/Lingo24TranslatorTest.java (with props)
Modified:
tika/tags/1.6/CHANGES.txt
tika/tags/1.6/tika-translate/src/main/resources/META-INF/services/org.apache.tika.language.translate.Translator
Modified: tika/tags/1.6/CHANGES.txt
URL: http://svn.apache.org/viewvc/tika/tags/1.6/CHANGES.txt?rev=1614950&r1=1614949&r2=1614950&view=diff
==============================================================================
--- tika/tags/1.6/CHANGES.txt (original)
+++ tika/tags/1.6/CHANGES.txt Thu Jul 31 18:29:32 2014
@@ -19,6 +19,9 @@ Release 1.6 - 07/27/2014
languages and added a default implementation that call's Microsoft's
translate service (TIKA-1319)
+ * Added an Translator implementation that calls Lingo24's Premium
+ Machine Translation API (TIKA-1381)
+
* Made RTFParser's list handling slightly more robust against corrupt
list metadata (TIKA-1305)
Added: tika/tags/1.6/tika-translate/src/main/java/org/apache/tika/language/translate/Lingo24Translator.java
URL: http://svn.apache.org/viewvc/tika/tags/1.6/tika-translate/src/main/java/org/apache/tika/language/translate/Lingo24Translator.java?rev=1614950&view=auto
==============================================================================
--- tika/tags/1.6/tika-translate/src/main/java/org/apache/tika/language/translate/Lingo24Translator.java (added)
+++ tika/tags/1.6/tika-translate/src/main/java/org/apache/tika/language/translate/Lingo24Translator.java Thu Jul 31 18:29:32 2014
@@ -0,0 +1,111 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.language.translate;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.cxf.jaxrs.client.WebClient;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.language.LanguageIdentifier;
+import org.apache.tika.language.LanguageProfile;
+
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.Response;
+import java.io.BufferedReader;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.Properties;
+
+/**
+ * An implementation of a REST client for the
+ * <a href="https://developer.lingo24.com/premium-machine-translation-api">Premium MT API v1</a>.
+ * You can sign up for an access plan online on the <a href="https://developer.lingo24.com/plans">Lingo24 Developer Portal</a>
+ * and set your Application's User Key in the <code>translator.lingo24.properties</code> file.
+ */
+public class Lingo24Translator implements Translator {
+
+ private static final String LINGO24_TRANSLATE_URL_BASE = "https://api.lingo24.com/mt/v1/translate";
+
+ private static final String DEFAULT_KEY = "dummy-key";
+
+ private WebClient client;
+
+ private String userKey;
+
+ private boolean isAvailable;
+
+ public Lingo24Translator() {
+ this.client = WebClient.create(LINGO24_TRANSLATE_URL_BASE);
+ this.isAvailable = true;
+ Properties config = new Properties();
+ try {
+ config.load(Lingo24Translator.class
+ .getClassLoader()
+ .getResourceAsStream(
+ "org/apache/tika/language/translate/translator.lingo24.properties"));
+ this.userKey = config.getProperty("translator.user-key");
+ if (this.userKey.equals(DEFAULT_KEY))
+ this.isAvailable = false;
+ } catch (Exception e) {
+ e.printStackTrace();
+ isAvailable = false;
+ }
+ }
+
+ @Override
+ public String translate(String text, String sourceLanguage,
+ String targetLanguage) throws Exception {
+ if (!this.isAvailable)
+ return text;
+ Response response = client.accept(MediaType.APPLICATION_JSON)
+ .query("user_key", userKey).query("source", sourceLanguage)
+ .query("target", targetLanguage).query("q", text).get();
+ BufferedReader reader = new BufferedReader(new InputStreamReader(
+ (InputStream) response.getEntity()));
+ String line = null;
+ StringBuffer responseText = new StringBuffer();
+ while ((line = reader.readLine()) != null) {
+ responseText.append(line);
+ }
+
+ ObjectMapper mapper = new ObjectMapper();
+ JsonNode jsonResp = mapper.readTree(responseText.toString());
+ if (jsonResp.findValuesAsText("errors").isEmpty()) {
+ return jsonResp.findValuesAsText("translation").get(0);
+ } else {
+ throw new TikaException(jsonResp.findValue("errors").get(0).asText());
+ }
+ }
+
+ @Override
+ public String translate(String text, String targetLanguage)
+ throws Exception {
+ if (!this.isAvailable)
+ return text;
+ LanguageIdentifier language = new LanguageIdentifier(
+ new LanguageProfile(text));
+ String sourceLanguage = language.getLanguage();
+ return translate(text, sourceLanguage, targetLanguage);
+ }
+
+ @Override
+ public boolean isAvailable() {
+ return this.isAvailable;
+ }
+
+}
Propchange: tika/tags/1.6/tika-translate/src/main/java/org/apache/tika/language/translate/Lingo24Translator.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: tika/tags/1.6/tika-translate/src/main/resources/META-INF/services/org.apache.tika.language.translate.Translator
URL: http://svn.apache.org/viewvc/tika/tags/1.6/tika-translate/src/main/resources/META-INF/services/org.apache.tika.language.translate.Translator?rev=1614950&r1=1614949&r2=1614950&view=diff
==============================================================================
--- tika/tags/1.6/tika-translate/src/main/resources/META-INF/services/org.apache.tika.language.translate.Translator (original)
+++ tika/tags/1.6/tika-translate/src/main/resources/META-INF/services/org.apache.tika.language.translate.Translator Thu Jul 31 18:29:32 2014
@@ -15,4 +15,5 @@
org.apache.tika.language.translate.MicrosoftTranslator
org.apache.tika.language.translate.GoogleTranslator
+org.apache.tika.language.translate.Lingo24Translator
org.apache.tika.language.translate.CachedTranslator
\ No newline at end of file
Added: tika/tags/1.6/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.lingo24.properties
URL: http://svn.apache.org/viewvc/tika/tags/1.6/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.lingo24.properties?rev=1614950&view=auto
==============================================================================
--- tika/tags/1.6/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.lingo24.properties (added)
+++ tika/tags/1.6/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.lingo24.properties Thu Jul 31 18:29:32 2014
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Must set the client keys in this file to use translation. Please see
+# https://code.google.com/p/microsoft-translator-java-api/ and
+# http://msdn.microsoft.com/en-us/library/hh454950.aspx for help with
+# getting these keys. As of now (6/2014) 2,000,000 characters/month
+# are free.
+#
+# To use the Lingo24 translation service, you <em>must</em> set your API-key
+# as described in Lingo24Translator. If you do not want translation
+# please set the value to "dummy-key".
+
+translator.user-key=dummy-key
Propchange: tika/tags/1.6/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.lingo24.properties
------------------------------------------------------------------------------
svn:eol-style = native
Added: tika/tags/1.6/tika-translate/src/test/java/org/apache/tika/language/translate/Lingo24TranslatorTest.java
URL: http://svn.apache.org/viewvc/tika/tags/1.6/tika-translate/src/test/java/org/apache/tika/language/translate/Lingo24TranslatorTest.java?rev=1614950&view=auto
==============================================================================
--- tika/tags/1.6/tika-translate/src/test/java/org/apache/tika/language/translate/Lingo24TranslatorTest.java (added)
+++ tika/tags/1.6/tika-translate/src/test/java/org/apache/tika/language/translate/Lingo24TranslatorTest.java Thu Jul 31 18:29:32 2014
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.language.translate;
+
+import junit.framework.TestCase;
+import org.junit.Before;
+
+/**
+ * Test harness for the {@link org.apache.tika.language.translate.Lingo24Translator}.
+ *
+ */
+public class Lingo24TranslatorTest extends TestCase {
+
+ private Lingo24Translator translator;
+
+ @Before
+ public void setUp() {
+ translator = new Lingo24Translator();
+ }
+
+ public void testSimpleTranslate() {
+ String source = "Hola, hoy es un dÃa genial para traducir";
+ String expected = "Hello, today is a great day to translate";
+
+ String result = null;
+ if (translator.isAvailable()) {
+ try {
+ result = translator.translate(source, "es", "en");
+ assertNotNull(result);
+ assertEquals("Result: [" + result
+ + "]: not equal to expected: [" + expected + "]",
+ expected, result);
+ } catch (Exception e) {
+ e.printStackTrace();
+ fail(e.getMessage());
+ }
+ }
+ }
+
+ public void testTranslateGuessLanguage() {
+ String source = "C'est une merveilleuse journée pour traduction";
+ String expected = "It is a wonderful day for translation";
+
+ String result = null;
+ if (translator.isAvailable()) {
+ try {
+ result = translator.translate(source, "en");
+ assertNotNull(result);
+ assertEquals("Result: [" + result
+ + "]: not equal to expected: [" + expected + "]",
+ expected, result);
+ } catch (Exception e) {
+ e.printStackTrace();
+ fail(e.getMessage());
+ }
+ }
+
+ }
+
+}
+
Propchange: tika/tags/1.6/tika-translate/src/test/java/org/apache/tika/language/translate/Lingo24TranslatorTest.java
------------------------------------------------------------------------------
svn:eol-style = native