You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/05/31 18:25:27 UTC
svn commit: r1344800 - in
/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main:
java/org/apache/stanbol/enhancer/engines/celi/langid/impl/LanguageIdentifierClientHTTP.java
resources/OSGI-INF/metatype/metatype.properties
Author: rwesten
Date: Thu May 31 16:25:27 2012
New Revision: 1344800
URL: http://svn.apache.org/viewvc?rev=1344800&view=rev
Log:
STABOL-583: Some minor improvements
* Changed the HttpClient of the Language identification service to be similar to the clients of the other engines.
* added the engine.name property to the metatype.properties file
* documented that users need to add {user}:{password} as license key
Modified:
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/LanguageIdentifierClientHTTP.java
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/resources/OSGI-INF/metatype/metatype.properties
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/LanguageIdentifierClientHTTP.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/LanguageIdentifierClientHTTP.java?rev=1344800&r1=1344799&r2=1344800&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/LanguageIdentifierClientHTTP.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/LanguageIdentifierClientHTTP.java Thu May 31 16:25:27 2012
@@ -1,12 +1,17 @@
package org.apache.stanbol.enhancer.engines.celi.langid.impl;
+import java.io.BufferedWriter;
import java.io.ByteArrayInputStream;
import java.io.IOException;
+import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.net.HttpURLConnection;
import java.net.URL;
+import java.nio.charset.Charset;
import java.util.Collections;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.Vector;
import javax.xml.soap.MessageFactory;
@@ -19,56 +24,53 @@ import javax.xml.transform.stream.Stream
import org.apache.clerezza.rdf.core.impl.util.Base64;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringEscapeUtils;
+import org.apache.stanbol.enhancer.engines.celi.utils.Utils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
public class LanguageIdentifierClientHTTP {
+ /**
+ * The UTF-8 {@link Charset}
+ */
+ private static final Charset UTF8 = Charset.forName("UTF-8");
+ /**
+ * The content type "text/xml; charset={@link #UTF8}"
+ */
+ private static final String CONTENT_TYPE = "text/xml; charset="+UTF8.name();
+ /**
+ * The XML version, encoding; SOAP envelope, heder and starting element of the body;
+ * processTextRequest and text starting element.
+ */
+ private static final String SOAP_PREFIX = "<soapenv:Envelope "
+ + "xmlns:soapenv=\"http://schemas.xmlsoap.org/soap/envelope/\" "
+ + "xmlns:lan=\"http://research.celi.it/LanguageIdentifierWS\">"
+ + "<soapenv:Header/><soapenv:Body>";
+ /**
+ * closes the text, processTextRequest, SOAP body and envelope
+ */
+ private static final String SOAP_SUFFIX = "</soapenv:Body></soapenv:Envelope>";
+
private URL serviceEP;
- private String licenseKey;
+ private final Map<String,String> requestHeaders;
private final Logger log = LoggerFactory.getLogger(getClass());
public LanguageIdentifierClientHTTP(URL serviceUrl, String licenseKey){
this.serviceEP=serviceUrl;
- this.licenseKey=licenseKey;
+ Map<String,String> headers = new HashMap<String,String>();
+ headers.put("Content-Type", CONTENT_TYPE);
+ if(licenseKey != null){
+ String encoded = Base64.encode(licenseKey.getBytes(UTF8));
+ headers.put("Authorization", "Basic "+encoded);
+ }
+ this.requestHeaders = Collections.unmodifiableMap(headers);
}
- public String doPostRequest(URL url, String body) throws IOException {
- HttpURLConnection urlConn = (HttpURLConnection) url.openConnection();
- urlConn.setRequestMethod("POST");
- urlConn.setDoInput(true);
- if (null != body) {
- urlConn.setDoOutput(true);
- } else {
- urlConn.setDoOutput(false);
- }
- urlConn.setUseCaches(false);
- String contentType = "text/xml; charset=utf-8";
- urlConn.setRequestProperty("Content-Type", contentType);
- if(this.licenseKey!=null){
- String encoded = Base64.encode(this.licenseKey.getBytes("UTF-8"));
- urlConn.setRequestProperty("Authorization", "Basic "+encoded);
- }
-
- // send POST output
- if (null != body) {
- OutputStreamWriter printout = new OutputStreamWriter(urlConn.getOutputStream(), "UTF-8");
- printout.write(body);
- printout.flush();
- printout.close();
- }
-
- //close connection
- urlConn.disconnect();
-
- // get response data
- return IOUtils.toString(urlConn.getInputStream(), "UTF8");
- }
//NOTE (rwesten): I rather do the error handling in the EnhancementEngine!
@@ -76,29 +78,35 @@ public class LanguageIdentifierClientHTT
if(text == null || text.isEmpty()){ // no text
return Collections.emptyList(); //no language
}
- List<GuessedLanguage> guesses = new Vector<GuessedLanguage>();
-
- String txt = StringEscapeUtils.escapeXml(text);
- String xmldata = "<soapenv:Envelope xmlns:soapenv=\"http://schemas.xmlsoap.org/soap/envelope/\" xmlns:lan=\"http://research.celi.it/LanguageIdentifierWS\"><soapenv:Header/><soapenv:Body>"
- +"<lan:guessQueryLanguage><textToGuess>"+txt+"</textToGuess></lan:guessQueryLanguage></soapenv:Body></soapenv:Envelope>";
-
-
- String responseXml = doPostRequest(this.serviceEP, xmldata);
- log.debug(responseXml);
+ //create the POST request
+ HttpURLConnection con = Utils.createPostRequest(serviceEP, requestHeaders);
+ //write content
+ BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(con.getOutputStream(),UTF8));
+ writer.write(SOAP_PREFIX);
+ writer.write("<lan:guessQueryLanguage><textToGuess>");
+ StringEscapeUtils.escapeXml(writer, text);
+ writer.write("</textToGuess></lan:guessQueryLanguage>");
+ writer.write(SOAP_SUFFIX);
+ writer.close();
+ //Call the service
+ long start = System.currentTimeMillis();
+ InputStream stream = con.getInputStream();
+ log.debug("Request to {} took {}ms",serviceEP,System.currentTimeMillis()-start);
+
+ // Create SoapMessage and parse the results
+ MessageFactory msgFactory = MessageFactory.newInstance();
+ SOAPMessage message = msgFactory.createMessage();
+ SOAPPart soapPart = message.getSOAPPart();
+
+ // Load the SOAP text into a stream source
+ StreamSource source = new StreamSource(stream);
- // Create SoapMessage
- MessageFactory msgFactory = MessageFactory.newInstance();
- SOAPMessage message = msgFactory.createMessage();
- SOAPPart soapPart = message.getSOAPPart();
-
- // Load the SOAP text into a stream source
- ByteArrayInputStream stream = new ByteArrayInputStream(responseXml.getBytes("UTF-8"));
- StreamSource source = new StreamSource(stream);
+ // Set contents of message
+ soapPart.setContent(source);
- // Set contents of message
- soapPart.setContent(source);
+ SOAPBody soapBody = message.getSOAPBody();
- SOAPBody soapBody = message.getSOAPBody();
+ List<GuessedLanguage> guesses = new Vector<GuessedLanguage>();
NodeList nlist = soapBody.getElementsByTagNameNS("*","return");
for (int i = 0; i < nlist.getLength(); i++) {
try {
@@ -118,31 +126,40 @@ public class LanguageIdentifierClientHTT
//NOTE (rwesten): I rather do the error handling in the EnhancementEngine!
public List<GuessedLanguage> guessLanguage(String text) throws IOException,SOAPException {
+ if(text == null || text.isEmpty()){
+ //no text -> no language
+ return Collections.emptyList();
+ }
+ //create the POST request
+ HttpURLConnection con = Utils.createPostRequest(serviceEP, requestHeaders);
+ //write content
+ BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(con.getOutputStream(),UTF8));
+ writer.write(SOAP_PREFIX);
+ writer.write("<lan:guessLanguage><textToGuess>");
+ StringEscapeUtils.escapeXml(writer, text);
+ writer.write("</textToGuess></lan:guessLanguage>");
+ writer.write(SOAP_SUFFIX);
+ writer.close();
+ //Call the service
+ long start = System.currentTimeMillis();
+ InputStream stream = con.getInputStream();
+ log.debug("Request to {} took {}ms",serviceEP,System.currentTimeMillis()-start);
+
+ // Create SoapMessage and parse the results
+ MessageFactory msgFactory = MessageFactory.newInstance();
+ SOAPMessage message = msgFactory.createMessage();
+ SOAPPart soapPart = message.getSOAPPart();
+
+ // Load the SOAP text into a stream source
+ StreamSource source = new StreamSource(stream);
- List<GuessedLanguage> guesses = new Vector<GuessedLanguage>();
-
- String txt = StringEscapeUtils.escapeXml(text);
- String xmldata = "<soapenv:Envelope xmlns:soapenv=\"http://schemas.xmlsoap.org/soap/envelope/\" xmlns:lan=\"http://research.celi.it/LanguageIdentifierWS\"><soapenv:Header/><soapenv:Body>"
- +"<lan:guessLanguage><textToGuess>"+txt+"</textToGuess></lan:guessLanguage></soapenv:Body></soapenv:Envelope>";
- //NOTE (rwesten) I think this should be also the #serviceEP
- //URI uri = new URI("http://linguagrid.org/LSGrid/ws/language-identifier");
-
- String responseXml = doPostRequest(serviceEP, xmldata);
- log.debug(responseXml);
+ // Set contents of message
+ soapPart.setContent(source);
- // Create SoapMessage
- MessageFactory msgFactory = MessageFactory.newInstance();
- SOAPMessage message = msgFactory.createMessage();
- SOAPPart soapPart = message.getSOAPPart();
-
- // Load the SOAP text into a stream source
- ByteArrayInputStream stream = new ByteArrayInputStream(responseXml.getBytes("UTF-8"));
- StreamSource source = new StreamSource(stream);
+ SOAPBody soapBody = message.getSOAPBody();
- // Set contents of message
- soapPart.setContent(source);
+ List<GuessedLanguage> guesses = new Vector<GuessedLanguage>();
- SOAPBody soapBody = message.getSOAPBody();
NodeList nlist = soapBody.getElementsByTagNameNS("*","return");
for (int i = 0; i < nlist.getLength(); i++) {
try {
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1344800&r1=1344799&r2=1344800&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/resources/OSGI-INF/metatype/metatype.properties (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/resources/OSGI-INF/metatype/metatype.properties Thu May 31 16:25:27 2012
@@ -19,6 +19,15 @@
# descriptions as used in the metatype.xml descriptor generated by the
# the maven SCR plugin
+stanbol.enhancer.engine.name.name=Name
+stanbol.enhancer.engine.name.description=The name of the enhancement engine as \
+used in the RESTful interface '/engine/<name>'
+
+service.ranking.name=Ranking
+service.ranking.description=If two enhancement engines with the same name are active the \
+one with the higher ranking will be used to process parsed content items.
+
+
#===============================================================================
#Properties and Options used to configure CELI enhancement
#===============================================================================
@@ -26,8 +35,8 @@
#Key for the License key - used by all engines (see also CeliConstants interface)
celi.license.name=License Key
-celi.license.description=The key needed to access the CELI Named Entity Recognizer \
-Web Service. This can be also set as OSGI framework or system property. An engine \
+celi.license.description=The "{user}:{password}" of the used CELI account. \
+This can be also set as OSGI framework or system property. An engine \
specific configuration will override a framework/system wide configuration.
#NER