You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/05/31 18:25:27 UTC

svn commit: r1344800 - in /incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main: java/org/apache/stanbol/enhancer/engines/celi/langid/impl/LanguageIdentifierClientHTTP.java resources/OSGI-INF/metatype/metatype.properties

Author: rwesten
Date: Thu May 31 16:25:27 2012
New Revision: 1344800

URL: http://svn.apache.org/viewvc?rev=1344800&view=rev
Log:
STABOL-583: Some minor improvements

* Changed the HttpClient of the Language identification service to be similar to the clients of the other engines.
* added the engine.name property to the metatype.properties file
* documented that users need to add {user}:{password} as license key

Modified:
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/LanguageIdentifierClientHTTP.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/resources/OSGI-INF/metatype/metatype.properties

Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/LanguageIdentifierClientHTTP.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/LanguageIdentifierClientHTTP.java?rev=1344800&r1=1344799&r2=1344800&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/LanguageIdentifierClientHTTP.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/LanguageIdentifierClientHTTP.java Thu May 31 16:25:27 2012
@@ -1,12 +1,17 @@
 package org.apache.stanbol.enhancer.engines.celi.langid.impl;
 
+import java.io.BufferedWriter;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.OutputStreamWriter;
 import java.net.HttpURLConnection;
 import java.net.URL;
+import java.nio.charset.Charset;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.Vector;
 
 import javax.xml.soap.MessageFactory;
@@ -19,56 +24,53 @@ import javax.xml.transform.stream.Stream
 import org.apache.clerezza.rdf.core.impl.util.Base64;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang.StringEscapeUtils;
+import org.apache.stanbol.enhancer.engines.celi.utils.Utils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.w3c.dom.Element;
 import org.w3c.dom.NodeList;
 
 public class LanguageIdentifierClientHTTP {
+    /**
+     * The UTF-8 {@link Charset}
+     */
+    private static final Charset UTF8 = Charset.forName("UTF-8");
+    /**
+     * The content type "text/xml; charset={@link #UTF8}"
+     */
+    private static final String CONTENT_TYPE = "text/xml; charset="+UTF8.name();
 	
+    /**
+     * The XML version, encoding; SOAP envelope, heder and starting element of the body;
+     * processTextRequest and text starting element.
+     */
+    private static final String SOAP_PREFIX = "<soapenv:Envelope " 
+            + "xmlns:soapenv=\"http://schemas.xmlsoap.org/soap/envelope/\" "
+            + "xmlns:lan=\"http://research.celi.it/LanguageIdentifierWS\">"
+            + "<soapenv:Header/><soapenv:Body>";
+    /**
+     * closes the text, processTextRequest, SOAP body and envelope
+     */
+    private static final String SOAP_SUFFIX = "</soapenv:Body></soapenv:Envelope>";
+    
 	private URL serviceEP;
-	private String licenseKey;
+    private final Map<String,String> requestHeaders;
 	
 	private final Logger log = LoggerFactory.getLogger(getClass());
 
 	
 	public LanguageIdentifierClientHTTP(URL serviceUrl, String licenseKey){
 		this.serviceEP=serviceUrl;
-		this.licenseKey=licenseKey;
+        Map<String,String> headers = new HashMap<String,String>();
+        headers.put("Content-Type", CONTENT_TYPE);
+        if(licenseKey != null){
+            String encoded = Base64.encode(licenseKey.getBytes(UTF8));
+            headers.put("Authorization", "Basic "+encoded);
+        }
+        this.requestHeaders = Collections.unmodifiableMap(headers);
 	}
 		
 	
-	public String doPostRequest(URL url, String body) throws IOException {
-		HttpURLConnection urlConn = (HttpURLConnection) url.openConnection();
-		urlConn.setRequestMethod("POST");
-		urlConn.setDoInput(true);
-		if (null != body) {
-			urlConn.setDoOutput(true);
-		} else {
-			urlConn.setDoOutput(false);
-		}
-		urlConn.setUseCaches(false);
-		String	contentType = "text/xml; charset=utf-8";
-		urlConn.setRequestProperty("Content-Type", contentType);
-		if(this.licenseKey!=null){
-			String encoded = Base64.encode(this.licenseKey.getBytes("UTF-8"));
-			urlConn.setRequestProperty("Authorization", "Basic "+encoded);
-		}
-		
-		// send POST output
-		if (null != body) {
-			OutputStreamWriter printout = new OutputStreamWriter(urlConn.getOutputStream(), "UTF-8");
-			printout.write(body);
-			printout.flush();
-			printout.close();
-		}
-		
-		//close connection
-		urlConn.disconnect();
-		
-		// get response data
-		return IOUtils.toString(urlConn.getInputStream(), "UTF8");
-	}
 
 
 	//NOTE (rwesten): I rather do the error handling in the EnhancementEngine!
@@ -76,29 +78,35 @@ public class LanguageIdentifierClientHTT
 	    if(text == null || text.isEmpty()){ // no text
 	        return Collections.emptyList(); //no language
 	    }
-		List<GuessedLanguage> guesses = new Vector<GuessedLanguage>();
-
-		String txt = StringEscapeUtils.escapeXml(text);
-		String xmldata = "<soapenv:Envelope xmlns:soapenv=\"http://schemas.xmlsoap.org/soap/envelope/\" xmlns:lan=\"http://research.celi.it/LanguageIdentifierWS\"><soapenv:Header/><soapenv:Body>"
-				+"<lan:guessQueryLanguage><textToGuess>"+txt+"</textToGuess></lan:guessQueryLanguage></soapenv:Body></soapenv:Envelope>";
-		
-		
-		String responseXml = doPostRequest(this.serviceEP, xmldata);
-		log.debug(responseXml);
+        //create the POST request
+        HttpURLConnection con = Utils.createPostRequest(serviceEP, requestHeaders);
+        //write content
+        BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(con.getOutputStream(),UTF8));
+        writer.write(SOAP_PREFIX);
+        writer.write("<lan:guessQueryLanguage><textToGuess>");
+        StringEscapeUtils.escapeXml(writer, text);
+        writer.write("</textToGuess></lan:guessQueryLanguage>");
+        writer.write(SOAP_SUFFIX);
+        writer.close();
+        //Call the service
+        long start = System.currentTimeMillis();
+        InputStream stream = con.getInputStream();
+        log.debug("Request to {} took {}ms",serviceEP,System.currentTimeMillis()-start);
+
+        // Create SoapMessage and parse the results
+        MessageFactory msgFactory = MessageFactory.newInstance();
+        SOAPMessage message = msgFactory.createMessage();
+        SOAPPart soapPart = message.getSOAPPart();
+
+        // Load the SOAP text into a stream source
+        StreamSource source = new StreamSource(stream);
 
-		// Create SoapMessage
-		MessageFactory msgFactory = MessageFactory.newInstance();
-		SOAPMessage message = msgFactory.createMessage();
-		SOAPPart soapPart = message.getSOAPPart();
-
-		// Load the SOAP text into a stream source
-		ByteArrayInputStream stream = new ByteArrayInputStream(responseXml.getBytes("UTF-8"));
-		StreamSource source = new StreamSource(stream);
+        // Set contents of message
+        soapPart.setContent(source);
 
-		// Set contents of message
-		soapPart.setContent(source);
+        SOAPBody soapBody = message.getSOAPBody();
 
-		SOAPBody soapBody = message.getSOAPBody();
+        List<GuessedLanguage> guesses = new Vector<GuessedLanguage>();
 		NodeList nlist = soapBody.getElementsByTagNameNS("*","return");
 		for (int i = 0; i < nlist.getLength(); i++) {
 			try {
@@ -118,31 +126,40 @@ public class LanguageIdentifierClientHTT
 	
     //NOTE (rwesten): I rather do the error handling in the EnhancementEngine!
 	public List<GuessedLanguage> guessLanguage(String text) throws IOException,SOAPException {
+       if(text == null || text.isEmpty()){
+            //no text -> no language
+            return Collections.emptyList();
+        }
+        //create the POST request
+        HttpURLConnection con = Utils.createPostRequest(serviceEP, requestHeaders);
+        //write content
+        BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(con.getOutputStream(),UTF8));
+        writer.write(SOAP_PREFIX);
+        writer.write("<lan:guessLanguage><textToGuess>");
+        StringEscapeUtils.escapeXml(writer, text);
+        writer.write("</textToGuess></lan:guessLanguage>");
+        writer.write(SOAP_SUFFIX);
+        writer.close();
+        //Call the service
+        long start = System.currentTimeMillis();
+        InputStream stream = con.getInputStream();
+        log.debug("Request to {} took {}ms",serviceEP,System.currentTimeMillis()-start);
+
+        // Create SoapMessage and parse the results
+        MessageFactory msgFactory = MessageFactory.newInstance();
+        SOAPMessage message = msgFactory.createMessage();
+        SOAPPart soapPart = message.getSOAPPart();
+
+        // Load the SOAP text into a stream source
+        StreamSource source = new StreamSource(stream);
 
-		List<GuessedLanguage> guesses = new Vector<GuessedLanguage>();
-
-		String txt = StringEscapeUtils.escapeXml(text);
-		String xmldata = "<soapenv:Envelope xmlns:soapenv=\"http://schemas.xmlsoap.org/soap/envelope/\" xmlns:lan=\"http://research.celi.it/LanguageIdentifierWS\"><soapenv:Header/><soapenv:Body>"
-				+"<lan:guessLanguage><textToGuess>"+txt+"</textToGuess></lan:guessLanguage></soapenv:Body></soapenv:Envelope>";
-		//NOTE (rwesten) I think this should be also the #serviceEP
-		//URI uri = new URI("http://linguagrid.org/LSGrid/ws/language-identifier");
-		
-		String responseXml = doPostRequest(serviceEP, xmldata);
-		log.debug(responseXml);
+        // Set contents of message
+        soapPart.setContent(source);
 
-		// Create SoapMessage
-		MessageFactory msgFactory = MessageFactory.newInstance();
-		SOAPMessage message = msgFactory.createMessage();
-		SOAPPart soapPart = message.getSOAPPart();
-
-		// Load the SOAP text into a stream source
-		ByteArrayInputStream stream = new ByteArrayInputStream(responseXml.getBytes("UTF-8"));
-		StreamSource source = new StreamSource(stream);
+        SOAPBody soapBody = message.getSOAPBody();
 
-		// Set contents of message
-		soapPart.setContent(source);
+        List<GuessedLanguage> guesses = new Vector<GuessedLanguage>();
 
-		SOAPBody soapBody = message.getSOAPBody();
 		NodeList nlist = soapBody.getElementsByTagNameNS("*","return");
 		for (int i = 0; i < nlist.getLength(); i++) {
 			try {

Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1344800&r1=1344799&r2=1344800&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/resources/OSGI-INF/metatype/metatype.properties (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/resources/OSGI-INF/metatype/metatype.properties Thu May 31 16:25:27 2012
@@ -19,6 +19,15 @@
 # descriptions as used in the metatype.xml descriptor generated by the
 # the maven SCR plugin
 
+stanbol.enhancer.engine.name.name=Name
+stanbol.enhancer.engine.name.description=The name of the enhancement engine as \
+used in the RESTful interface '/engine/<name>'
+
+service.ranking.name=Ranking
+service.ranking.description=If two enhancement engines with the same name are active the \
+one with the higher ranking will be used to process parsed content items.
+
+
 #===============================================================================
 #Properties and Options used to configure CELI enhancement 
 #===============================================================================
@@ -26,8 +35,8 @@
 #Key for the License key - used by all engines (see also CeliConstants interface)
 
 celi.license.name=License Key
-celi.license.description=The key needed to access the CELI Named Entity Recognizer \
-Web Service. This can be also set as OSGI framework or system property. An engine \
+celi.license.description=The "{user}:{password}" of the used CELI account. \
+This can be also set as OSGI framework or system property. An engine \
 specific configuration will override a framework/system wide configuration.
 
 #NER