You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2014/07/08 01:20:54 UTC

svn commit: r1608628 - in /tika/trunk/tika-translate: ./ src/main/java/org/apache/tika/language/translate/ src/main/resources/org/apache/tika/language/translate/ src/test/java/org/apache/tika/language/translate/

Author: mattmann
Date: Mon Jul  7 23:20:53 2014
New Revision: 1608628

URL: http://svn.apache.org/r1608628
Log:
- fix for TIKA-1362: Add GoogleTranslate implementation of Translation API

Added:
    tika/trunk/tika-translate/src/main/java/org/apache/tika/language/translate/GoogleTranslator.java
    tika/trunk/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.google.properties
    tika/trunk/tika-translate/src/test/java/org/apache/tika/language/translate/GoogleTranslatorTest.java
Modified:
    tika/trunk/tika-translate/pom.xml

Modified: tika/trunk/tika-translate/pom.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-translate/pom.xml?rev=1608628&r1=1608627&r2=1608628&view=diff
==============================================================================
--- tika/trunk/tika-translate/pom.xml (original)
+++ tika/trunk/tika-translate/pom.xml Mon Jul  7 23:20:53 2014
@@ -46,6 +46,16 @@
       <version>0.6.2</version>
       <type>jar</type>
     </dependency>
+    <dependency>
+      <groupId>org.apache.cxf</groupId>
+      <artifactId>cxf-rt-frontend-jaxrs</artifactId>
+      <version>2.7.8</version>
+    </dependency>
+	<dependency>
+	  <groupId>com.fasterxml.jackson.jaxrs</groupId>
+	  <artifactId>jackson-jaxrs-json-provider</artifactId>
+	  <version>2.4.0</version>
+	</dependency>        
 
     <!-- Test dependencies -->
     <dependency>

Added: tika/trunk/tika-translate/src/main/java/org/apache/tika/language/translate/GoogleTranslator.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-translate/src/main/java/org/apache/tika/language/translate/GoogleTranslator.java?rev=1608628&view=auto
==============================================================================
--- tika/trunk/tika-translate/src/main/java/org/apache/tika/language/translate/GoogleTranslator.java (added)
+++ tika/trunk/tika-translate/src/main/java/org/apache/tika/language/translate/GoogleTranslator.java Mon Jul  7 23:20:53 2014
@@ -0,0 +1,116 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.language.translate;
+
+import java.io.BufferedReader;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.Properties;
+import java.util.logging.Logger;
+
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.Response;
+
+import org.apache.cxf.jaxrs.client.WebClient;
+import org.apache.tika.language.LanguageIdentifier;
+import org.apache.tika.language.LanguageProfile;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+/**
+ * An implementation of a REST client to the <a
+ * href="https://www.googleapis.com/language/translate/v2">Google Translate v2
+ * API</a>. Based on the <a
+ * href="http://hayageek.com/google-translate-api-tutorial/">great tutorial</a>
+ * from <a href="http://hayageek.com">hayageek.com</a>. Set your API key in
+ * translator.google.properties.
+ * 
+ * 
+ */
+public class GoogleTranslator implements Translator {
+
+	private static final String GOOGLE_TRANSLATE_URL_BASE = "https://www.googleapis.com/language/translate/v2";
+
+	private static final String DEFAULT_KEY = "dummy-secret";
+
+	private static final Logger LOG = Logger.getLogger(GoogleTranslator.class
+			.getName());
+
+	private WebClient client;
+
+	private String apiKey;
+
+	private boolean isAvailable;
+
+	public GoogleTranslator() {
+		this.client = WebClient.create(GOOGLE_TRANSLATE_URL_BASE);
+		this.isAvailable = true;
+		Properties config = new Properties();
+		try {
+			config.load(GoogleTranslator.class
+					.getClassLoader()
+					.getResourceAsStream(
+							"org/apache/tika/language/translate/translator.google.properties"));
+			this.apiKey = config.getProperty("translator.client-secret");
+			if (this.apiKey.equals(DEFAULT_KEY))
+				this.isAvailable = false;
+		} catch (Exception e) {
+			e.printStackTrace();
+			isAvailable = false;
+		}
+	}
+
+	@Override
+	public String translate(String text, String sourceLanguage,
+			String targetLanguage) throws Exception {
+		if (!this.isAvailable)
+			return text;
+		Response response = client.accept(MediaType.APPLICATION_JSON)
+				.query("key", apiKey).query("source", sourceLanguage)
+				.query("target", targetLanguage).query("q", text).get();
+		BufferedReader reader = new BufferedReader(new InputStreamReader(
+				(InputStream) response.getEntity()));
+		String line = null;
+		StringBuffer responseText = new StringBuffer();
+		while ((line = reader.readLine()) != null) {
+			responseText.append(line);
+		}
+
+		ObjectMapper mapper = new ObjectMapper();
+		JsonNode jsonResp = mapper.readTree(responseText.toString());
+		return jsonResp.findValuesAsText("translatedText").get(0);
+	}
+
+	@Override
+	public String translate(String text, String targetLanguage)
+			throws Exception {
+		if (!this.isAvailable)
+			return text;
+		LanguageIdentifier language = new LanguageIdentifier(
+				new LanguageProfile(text));
+		String sourceLanguage = language.getLanguage();
+		return translate(text, sourceLanguage, targetLanguage);
+	}
+
+	@Override
+	public boolean isAvailable() {
+		return this.isAvailable;
+	}
+
+}

Added: tika/trunk/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.google.properties
URL: http://svn.apache.org/viewvc/tika/trunk/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.google.properties?rev=1608628&view=auto
==============================================================================
--- tika/trunk/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.google.properties (added)
+++ tika/trunk/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.google.properties Mon Jul  7 23:20:53 2014
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Must set the client keys in this file to use translation. Please see
+# https://code.google.com/p/microsoft-translator-java-api/ and
+# http://msdn.microsoft.com/en-us/library/hh454950.aspx for help with
+# getting these keys. As of now (6/2014) 2,000,000 characters/month
+# are free.
+#
+# To use the Google translation service, you <em>must</em> set your API-key
+# as described in GoogleTranslator. If you do not want translation
+# please set the value to "dummy-secret".
+
+translator.client-secret=dummy-secret

Added: tika/trunk/tika-translate/src/test/java/org/apache/tika/language/translate/GoogleTranslatorTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-translate/src/test/java/org/apache/tika/language/translate/GoogleTranslatorTest.java?rev=1608628&view=auto
==============================================================================
--- tika/trunk/tika-translate/src/test/java/org/apache/tika/language/translate/GoogleTranslatorTest.java (added)
+++ tika/trunk/tika-translate/src/test/java/org/apache/tika/language/translate/GoogleTranslatorTest.java Mon Jul  7 23:20:53 2014
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.language.translate;
+
+import org.junit.Before;
+
+import junit.framework.TestCase;
+
+/**
+ * Test harness for the {@link GoogleTranslator}.
+ * 
+ */
+public class GoogleTranslatorTest extends TestCase {
+
+	private GoogleTranslator translator;
+
+	public GoogleTranslatorTest() {
+
+	}
+
+	@Before
+	public void setUp() {
+		translator = new GoogleTranslator();
+	}
+
+	public void testSimpleTranslate() {
+		String source = "hola senor";
+		String expected = "hello sir";
+
+		String result = null;
+		if (translator.isAvailable()) {
+			try {
+				result = translator.translate(source, "es", "en");
+				assertNotNull(result);
+				assertEquals("Result: [" + result
+						+ "]: not equal to expected: [" + expected + "]",
+						expected, result);
+			} catch (Exception e) {
+				e.printStackTrace();
+				fail(e.getMessage());
+			}
+		}
+	}
+
+	public void testTranslateGuessLanguage() {
+		String source = "bonjour comment vas-tu";
+		String expected = "hello how are you";
+
+		String result = null;
+		if (translator.isAvailable()) {
+			try {
+				result = translator.translate(source, "en");
+				assertNotNull(result);
+				assertEquals("Result: [" + result
+						+ "]: not equal to expected: [" + expected + "]",
+						expected, result);
+			} catch (Exception e) {
+				e.printStackTrace();
+				fail(e.getMessage());
+			}
+		}
+
+	}
+
+}