You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2011/12/20 09:52:09 UTC

svn commit: r1221148 [2/2] - in /incubator/stanbol/trunk: ./ cmsadapter/cmis/src/main/java/org/apache/stanbol/cmsadapter/cmis/mapping/ cmsadapter/jcr/src/main/java/org/apache/stanbol/cmsadapter/jcr/mapping/ commons/web/base/src/test/java/org/apache/sta...

Modified: incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/EngineException.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/EngineException.java?rev=1221148&r1=1221147&r2=1221148&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/EngineException.java (original)
+++ incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/EngineException.java Tue Dec 20 08:52:07 2011
@@ -48,7 +48,7 @@ public class EngineException extends Exc
     public EngineException(EnhancementEngine ee, ContentItem ci, String message, Throwable cause) {
         super(String.format(
                 "'%s' failed to process content item '%s' with type '%s': %s",
-                ee.getClass().getSimpleName(), ci.getId(), ci.getMimeType(),
+                ee.getClass().getSimpleName(), ci.getUri().getUnicodeString(), ci.getMimeType(),
                 message == null ? cause : message), cause);
     }
 }

Modified: incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/InvalidContentException.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/InvalidContentException.java?rev=1221148&r1=1221147&r2=1221148&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/InvalidContentException.java (original)
+++ incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/InvalidContentException.java Tue Dec 20 08:52:07 2011
@@ -38,7 +38,7 @@ public class InvalidContentException ext
             Throwable cause) {
         super(String.format("'%s' failed to process invalid content item '%s'"
                 + " with type '%s': %s", ee.getClass().getSimpleName(),
-                ci.getId(), ci.getMimeType(), cause.getMessage()), cause);
+                ci.getUri().getUnicodeString(), ci.getMimeType(), cause.getMessage()), cause);
     }
 
 }

Added: incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/NoSuchPartException.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/NoSuchPartException.java?rev=1221148&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/NoSuchPartException.java (added)
+++ incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/NoSuchPartException.java Tue Dec 20 08:52:07 2011
@@ -0,0 +1,23 @@
+package org.apache.stanbol.enhancer.servicesapi;
+
+import org.apache.clerezza.rdf.core.UriRef;
+
+/**
+ * Indicates that a COntent Item doesn't has the requested part
+ *
+ */
+public class NoSuchPartException extends RuntimeException {
+
+    private static final long serialVersionUID = 1L;
+
+    public NoSuchPartException(int index) {
+		super("The Content Item has no part with index "+index);
+	}
+    public NoSuchPartException(UriRef partUri) {
+        super("The Content Item has no part with index "+partUri);
+    }
+	public NoSuchPartException(String message) {
+		super(message);
+	}
+
+}

Propchange: incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/NoSuchPartException.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/ContentItemHelper.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/ContentItemHelper.java?rev=1221148&r1=1221147&r2=1221148&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/ContentItemHelper.java (original)
+++ incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/ContentItemHelper.java Tue Dec 20 08:52:07 2011
@@ -24,8 +24,13 @@ import java.io.UnsupportedEncodingExcept
 import java.net.URLEncoder;
 import java.security.MessageDigest;
 import java.security.NoSuchAlgorithmException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.StringTokenizer;
 
 import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.commons.io.IOUtils;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 
 
@@ -53,7 +58,7 @@ public class ContentItemHelper {
      * it.
      */
     public static UriRef ensureUri(ContentItem ci) {
-        String uri = ci.getId();
+        String uri = ci.getUri().getUnicodeString();
         if (!uri.startsWith("http://") && !uri.startsWith("urn:")) {
             uri = "urn:" + urlEncode(uri);
         }
@@ -125,23 +130,88 @@ public class ContentItemHelper {
         return buf.toString();
     }
 
-    public static UriRef makeDefaultUrn(byte[] content) {
-        return makeDefaultUri("urn:content-item-", content);
+    public static UriRef makeDefaultUrn(Blob blob) {
+        return makeDefaultUri("urn:content-item-", blob.getStream());
     }
-
-    public static UriRef makeDefaultUri(String baseUri, byte[] content) {
+    public static UriRef makeDefaultUrn(InputStream in) {
+        return makeDefaultUri("urn:content-item-", in);
+    }
+    public static UriRef makeDefaultUrn(byte[] data){
+        return makeDefaultUri("urn:content-item-", new ByteArrayInputStream(data));
+    }
+    public static UriRef makeDefaultUri(String baseUri, Blob blob) {
+        return makeDefaultUri(baseUri, blob.getStream());
+    }
+    public static UriRef makeDefaultUri(String baseUri, byte[] data) {
+        return makeDefaultUri(baseUri, new ByteArrayInputStream(data));
+    }
+    public static UriRef makeDefaultUri(String baseUri, InputStream in) {
         // calculate an ID based on the digest of the content
         if (!baseUri.startsWith("urn:") && !baseUri.endsWith("/")) {
             baseUri += "/";
         }
-        String hexDigest = "";
+        String hexDigest;
         try {
-            hexDigest = streamDigest(new ByteArrayInputStream(content), null, SHA1);
+            hexDigest = streamDigest(in, null, SHA1);
         } catch (IOException e) {
-            // this is not going to happen since output stream is null and the
-            // input data is already loaded in memory
+            throw new IllegalStateException("Unable to read content for calculating" +
+            		"the hexDigest of the parsed content as used for the default URI" +
+            		"of an ContentItem!",e);
         }
+        IOUtils.closeQuietly(in);
         return new UriRef(baseUri + SHA1.toLowerCase() + "-" + hexDigest);
     }
-
+    /**
+     * This parses and validates the mime-type and parameters from the
+     * parsed mimetype string based on the definition as defined in
+     * <a href="http://www.ietf.org/rfc/rfc2046.txt">rfc2046</a>. 
+     * <p>
+     * The mime-type is stored as value for the <code>null</code>
+     * key. Parameter keys are converted to lower case. Values are stored as
+     * defined in the parsed media type. Parameters with empty key, empty or no
+     * values are ignored.
+     * @param mimeTypeString the media type formatted as defined by 
+     * <a href="http://www.ietf.org/rfc/rfc2046.txt">rfc2046</a>
+     * @return A map containing the mime-type under the <code>null</code> key and 
+     * all parameters with lower case keys and values.
+     * @throws IllegalArgumentException if the parsed mimeTypeString is
+     * <code>null</code>, empty or the parsed mime-type is empty, does not define
+     * non empty '{type}/{sub-type}' or uses a wildcard for the type or sub-type.
+     */
+    public static Map<String,String> parseMimeType(String mimeTypeString){
+        String mimeType;
+        if(mimeTypeString == null || mimeTypeString.isEmpty()){
+            throw new IllegalArgumentException("The parsed mime-type MUST NOT be NULL nor empty!");
+        }
+        Map<String,String> parsed = new HashMap<String,String>();
+        StringTokenizer tokens = new StringTokenizer(mimeTypeString, ";");
+        mimeType = tokens.nextToken(); //the first token is the mimeType
+        if(mimeType.isEmpty()){
+            throw new IllegalArgumentException("Parsed mime-type MUST NOT be empty" +
+                    "(mimeType='"+mimeType+"')!");
+        }
+        if(mimeType.indexOf('*')>=0){
+            throw new IllegalArgumentException("Parsed mime-type MUST NOT use" +
+                    "Wildcards (mimeType='"+mimeType+"')!");
+        }
+        String[] typeSubType = mimeType.split("/");
+        if(typeSubType.length != 2 || typeSubType[0].isEmpty() || typeSubType[1].isEmpty()) {
+            throw new IllegalArgumentException("Parsed mime-type MUST define '{type}/{sub-type}'" +
+            		"and both MUST NOT be empty(mimeType='"+mimeType+"')!");
+        }
+        parsed.put(null, mimeType);
+        while(tokens.hasMoreTokens()){ //parse the parameters (if any)
+            String parameter = tokens.nextToken();
+            //check if the parameter is valid formated and has a non empty value
+            int nameValueSeparator = parameter.indexOf('=');
+            if(nameValueSeparator>0 && parameter.length() > nameValueSeparator+2){
+                //keys are case insensitive (we use lower case)
+                String key = parameter.substring(0,nameValueSeparator).toLowerCase();
+                if(!parsed.containsKey(key)){ //do not override existing keys
+                    parsed.put(key,parameter.substring(nameValueSeparator+1));
+                }
+            }
+        }
+        return parsed;
+    }
 }

Added: incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/ContentItemImpl.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/ContentItemImpl.java?rev=1221148&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/ContentItemImpl.java (added)
+++ incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/ContentItemImpl.java Tue Dec 20 08:52:07 2011
@@ -0,0 +1,204 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.stanbol.enhancer.servicesapi.helper;
+
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.NoSuchPartException;
+
+
+/** 
+ * A generic ContentItem implementation that takes the uri, main content part
+ * and the graph used to store the metadata as parameter.
+ * <p>
+ * This content item consisting initially of a single blob. 
+ * Subclasses don't have to care about multi-parts aspects of content item. 
+ * By inheriting from this class the ability for clients to add additional parts 
+ * is ensured. 
+ * <p>
+ * Even through this class does implement the full {@link ContentItem} interface
+ * it is marked as abstract and has only a protected constructor because it is
+ * not intended that users directly instantiate it. The intended usage is to
+ * create subclasses that instantiate ContentItmes with specific combinations
+ * of {@link Blob} nad {@link MGraph} implementations.<p>
+ * Examples are: <ul>
+ * <li>The {@link InMemoryContentItem} intended for in-memory
+ * storage of ContentItems during the stateless enhancement workflow
+ * <li> The {@link WebContentItem} that allows to create a ContentItem from an 
+ * URI.
+ * </ul>
+ * TODO (rwesten): check if we want this to be an abstract class or if there are
+ * reasons to have a general purpose ContentItem implementation
+ */
+public abstract class ContentItemImpl implements ContentItem {
+    
+    protected static final String MAIN_BLOB_SUFFIX = "_main";
+
+    /**
+     * Holds the content parts of this ContentItem
+     */
+	private final LinkedHashMap<UriRef, Object> parts = new LinkedHashMap<UriRef, Object>();
+	/**
+	 * The uri of the ContentItem
+	 */
+	private final UriRef uri;
+	/**
+	 * The uri of the main content part (the {@link Blob} parsed with the constructor)
+	 */
+	private final UriRef mainBlobUri;
+
+    private final MGraph metadata; 
+	
+	protected ContentItemImpl(UriRef uri, Blob main, MGraph metadata) {
+	    if(uri == null){
+	        throw new IllegalArgumentException("The URI for the ContentItem MUST NOT be NULL!");
+	    }
+	    if(main == null){
+	        throw new IllegalArgumentException("The main Blob MUST NOT be NULL!");
+	    }
+	    if(metadata == null){
+	        throw new IllegalArgumentException("Tha parsed graph MUST NOT be NULL!");
+	    }
+        this.uri = uri;
+        this.mainBlobUri = new UriRef(uri.getUnicodeString()+MAIN_BLOB_SUFFIX);
+        this.parts.put(mainBlobUri, main);
+	    this.metadata = metadata;
+		//Better parse the Blob in the Constructor than calling a public
+		//method on a may be not fully initialised instance
+		//parts.put(new UriRef(uri.getUnicodeString()+"_main"), getBlob());
+	}
+	
+	/**
+	 * Final getter retrieving the Blob via {@link #getPart(UriRef, Class)}
+	 * with <code>{@link #getUri()}+{@link #MAIN_BLOB_SUFFIX}</code>
+	 */
+	@Override
+	public final Blob getBlob() {
+	    return (Blob) parts.get(mainBlobUri);
+	}
+	@Override
+	public final InputStream getStream() {
+	    return getBlob().getStream();
+	}
+    @Override
+    public final String getMimeType() {
+        return getBlob().getMimeType();
+    }
+	
+    @SuppressWarnings("unchecked")
+	@Override
+	public <T> T getPart(UriRef uri, Class<T> clazz) throws NoSuchPartException {
+        if(parts.containsKey(uri)){
+            return (T) parts.get(uri);
+        } else {
+		    throw new NoSuchPartException(uri);
+		}
+	}
+
+	@Override
+	public UriRef getPartUri(int index) throws NoSuchPartException {
+		int count = 0;
+		for(Map.Entry<UriRef, Object> entry : parts.entrySet()) {
+			if (count == index) {
+				return entry.getKey();
+			}
+			count++;
+		}
+		throw new NoSuchPartException(index);
+	}
+	
+	@SuppressWarnings("unchecked")
+	@Override
+	public <T> T getPart(int index, Class<T> clazz) throws NoSuchPartException {
+		Object result = null;
+		int count = 0;
+		for(Map.Entry<UriRef, Object> entry : parts.entrySet()) {
+			if (count == index) {
+				result = entry.getValue();
+				if (!result.getClass().isAssignableFrom(clazz)) {
+					throw new NoSuchPartException("The body part 0 is of type "+result.getClass().getName()+" which cannot be converted to "+clazz.getName());
+				}
+				return (T) result;
+			}
+			count++;
+		}
+		throw new NoSuchPartException(index);
+	}
+	
+	@Override
+	public Object addPart(UriRef uriRef, Object object) {
+	    if(uriRef == null || object == null){
+	        throw new IllegalArgumentException("The parsed content part ID and " +
+	        		"object MUST NOT be NULL!");
+	    }
+	    if(uriRef.equals(mainBlobUri)){ //avoid that this method is used to
+	        //reset the main content part
+	        throw new IllegalArgumentException("The parsed content part ID MUST " +
+	        		"NOT be equals to the ID used by the main Content Part " +
+	        		"( ContentItem.getUri()+\"_main\")");
+	    }
+		return parts.put(uriRef, object);
+		
+	}
+
+    @Override
+	public UriRef getUri() {
+		return uri;
+	}
+
+	@Override
+	public MGraph getMetadata() {
+	    return metadata;
+	}
+	@Override
+	public int hashCode() {
+	    return uri.hashCode();
+	}
+	@Override
+	public boolean equals(Object o) {
+	    //TODO: is it OK to check only for the uri? An implementation that takes
+	    //      the uri, metadata and all content parts into account might be
+	    //      to expensive for most common use cases.
+	    return o instanceof ContentItem && //check type
+	            ((ContentItem)o).getUri().equals(uri);
+	}
+    @Override
+    public String toString() {
+        return String.format("%s uri=[%s], content=[%s;mime-type:%s%s], metadata=[%s triples], " +
+        		"parts=%s", 
+            getClass().getSimpleName(), //the implementation
+            getUri().getUnicodeString(), //the URI
+            //the size in Bytes (if available)
+            getBlob().getContentLength()>=0 ?("size:"+getBlob().getContentLength()+" bytes;") : "",
+            getBlob().getMimeType(), //the mime-type
+            //and parameter (if available)
+            getBlob().getParameter().isEmpty() ? "" : (";parameter:"+getBlob().getParameter()),
+            getMetadata().size(), //the number of triples
+            parts.keySet()); //and the part URIs
+    }
+
+}

Propchange: incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/ContentItemImpl.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java?rev=1221148&r1=1221147&r2=1221148&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java (original)
+++ incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java Tue Dec 20 08:52:07 2011
@@ -61,7 +61,7 @@ public class EnhancementEngineHelper {
      */
     public static UriRef createTextEnhancement(ContentItem ci,
             EnhancementEngine engine){
-        return createTextEnhancement(ci.getMetadata(), engine, new UriRef(ci.getId()));
+        return createTextEnhancement(ci.getMetadata(), engine, new UriRef(ci.getUri().getUnicodeString()));
     }
     /**
      * Create a new instance with the types enhancer:Enhancement and
@@ -95,7 +95,7 @@ public class EnhancementEngineHelper {
      */
     public static UriRef createEntityEnhancement(ContentItem ci,
             EnhancementEngine engine){
-        return createEntityEnhancement(ci.getMetadata(), engine, new UriRef(ci.getId()));
+        return createEntityEnhancement(ci.getMetadata(), engine, new UriRef(ci.getUri().getUnicodeString()));
     }
     /**
      * Create a new instance with the types enhancer:Enhancement and
@@ -183,7 +183,7 @@ public class EnhancementEngineHelper {
 
         // relate the extraction to the content item
         metadata.add(new TripleImpl(extraction,
-                Properties.ENHANCER_RELATED_CONTENT_ITEM, new UriRef(ci.getId())));
+                Properties.ENHANCER_RELATED_CONTENT_ITEM, new UriRef(ci.getUri().getUnicodeString())));
 
         // creation date
         metadata.add(new TripleImpl(extraction, Properties.DC_CREATED,

Added: incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/InMemoryBlob.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/InMemoryBlob.java?rev=1221148&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/InMemoryBlob.java (added)
+++ incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/InMemoryBlob.java Tue Dec 20 08:52:07 2011
@@ -0,0 +1,125 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.stanbol.enhancer.servicesapi.helper;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.Charset;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+
+import org.apache.commons.io.IOUtils;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
+
+/**
+ * Holds the parsed data in an byte array. Parsed byte[] are NOT copied,
+ * Strings are encoded as UTF-8 and {@link InputStream} are copied by using 
+ * {@link IOUtils#toByteArray(InputStream)}.<p>
+ * The default mime-types (if <code>null</code> is parsed as mimeType) are for
+ * Strings "text/plain" and in all other cases "application/octet-stream".
+ */
+public class InMemoryBlob implements Blob {
+    private static final Charset UTF8 = Charset.forName("utf-8");
+    public static final String DEFAULT_TEXT_MIMETYPE = "text/plain";
+    public static final String DEFAULT_BINARY_MIMETYPE = "application/octet-stream";
+
+    protected final String mimeType;
+    protected final Map<String,String> parameters;
+    
+    private byte[] data;
+	/**
+	 * Creates an {@link InMemoryBlob} for the parsed String. If a "charset"
+	 * parameter is present for the parsed mimeType it is replaced with "UTF-8"
+	 * used to encode the Sting as byte[].
+	 * @param text the text
+	 * @param mimeType the mimeType. If <code>null</code> "text/plain" is used
+	 * as default
+	 */
+	public InMemoryBlob(String text, String mimeType){
+	    this(text.getBytes(UTF8),mimeType != null ? mimeType : DEFAULT_TEXT_MIMETYPE,
+	            Collections.singletonMap("charset", UTF8.name()));
+	}
+	/**
+	 * Creates an instance for the parsed {@link InputStream}. Data are copied
+	 * to a byte array. The parsed stream is closed after copying the data.
+	 * @param in the {@link InputStream}. MUST NOT be <code>null</code>
+	 * @param mimeType the mime-type. If <code>null</code>  "application/octet-stream"
+	 * is used as default.
+	 * @throws IOException indicates an error while reading from the parsed stream
+	 */
+	public InMemoryBlob(InputStream in,String mimeType) throws IOException {
+	    this(IOUtils.toByteArray(in),mimeType);
+	    IOUtils.closeQuietly(in);
+	}
+	/**
+	 * Creates an instance for the parsed byte array. The array is NOT copied
+	 * therefore changes within that array will be reflected to components
+	 * reading the data from this Blob.
+	 * @param data the data. MIST NOT be <code>null</code>
+	 * @param mimeType the mime-type. If <code>null</code>  "application/octet-stream"
+     * is used as default.
+	 */
+	public InMemoryBlob(byte[] data, String mimeType) {
+	    this(data,mimeType,null);
+	}
+	/**
+	 * Internally used constructor that allows to parse additional parameters as
+	 * required to ensure setting the 'charset' in case initialisation was done
+	 * by parsing a string
+	 * @param data
+	 * @param mimeType
+	 * @param parsedParameters
+	 */
+    protected InMemoryBlob(byte[] data, String mimeType,Map<String,String> parsedParameters) {
+        if(data == null){
+            throw new IllegalArgumentException("The parsed content MUST NOT be NULL!");
+        }
+        this.data = data;
+        Map<String,String> parameters;
+	    if(mimeType == null){
+	        this.mimeType = DEFAULT_BINARY_MIMETYPE;
+	        parameters = new HashMap<String,String>();
+	    } else {
+	        parameters = ContentItemHelper.parseMimeType(mimeType);
+	        this.mimeType = parameters.remove(null);
+	    }
+	    if(parsedParameters != null){
+	        parameters.putAll(parsedParameters);
+	    }
+	    this.parameters = Collections.unmodifiableMap(parameters);
+	}
+
+	@Override
+	public final InputStream getStream() {
+		return new ByteArrayInputStream(data);
+	}
+	@Override
+	public final long getContentLength() {
+	    return data.length;
+	}
+    @Override
+    public final String getMimeType() {
+        return mimeType;
+    }
+    @Override
+    public final Map<String,String> getParameter() {
+        return parameters;
+    }
+}

Propchange: incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/InMemoryBlob.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/InMemoryContentItem.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/InMemoryContentItem.java?rev=1221148&r1=1221147&r2=1221148&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/InMemoryContentItem.java (original)
+++ incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/InMemoryContentItem.java Tue Dec 20 08:52:07 2011
@@ -16,94 +16,62 @@
 */
 package org.apache.stanbol.enhancer.servicesapi.helper;
 
-import java.io.ByteArrayInputStream;
-import java.io.InputStream;
 
 import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.UriRef;
 import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
-import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
 
 
 /**
- * Base content item implementation that holds a complete copy of the data in
- * memory.
+ * ContentItem implementation that holds a complete copy of the data in
+ * memory. Internally it uses {@link InMemoryBlob} to store the content and
+ * an {@link SimpleMGraph} for the metadata.
  * <p>
  * This implementation can be used independently of any store implementation and
  * is suitable for stateless processing.
  */
-public class InMemoryContentItem implements ContentItem {
-    // private final Logger log = LoggerFactory.getLogger(getClass());
+public class InMemoryContentItem extends ContentItemImpl {
 
-    private final MGraph metadata;
+//Do not allow to create a ContentItem without a content
+//    public InMemoryContentItem(String id) {
+//        this(id, null, null, null);
+//    }
 
-    private final String id;
-
-    private final String mimeType;
-
-    private final byte[] data;
-
-    public InMemoryContentItem(String id) {
-        this(id, null, null, null);
+    public InMemoryContentItem(byte[] content, String mimeType) {
+        this((UriRef)null,new InMemoryBlob(content, mimeType),null);
     }
+    
+    public InMemoryContentItem(String id, String content, String mimeType) {
+		this(id, new InMemoryBlob(content, mimeType),null);
+	}
 
-    public InMemoryContentItem(byte[] content, String mimetype) {
-        this(null, content, mimetype, null);
+    public InMemoryContentItem(String id, byte[] content, String mimetype) {
+        this(id,new InMemoryBlob(content, mimetype),null);
     }
 
-    public InMemoryContentItem(String id, byte[] content, String mimeType) {
-        this(id, content, mimeType, null);
-    }
-
-    public InMemoryContentItem(String id, byte[] content, String mimeType,
+    public InMemoryContentItem(String uriString, byte[] content, String mimeType,
             MGraph metadata) {
-        if (id == null) {
-            id = ContentItemHelper.makeDefaultUrn(content).getUnicodeString();
-        }
-
-        if (metadata == null) {
-            metadata = new SimpleMGraph();
-        }
-        if (mimeType == null) {
-            mimeType = "application/octet-stream";
-        } else {
-            // Keep only first part of content-types like text/plain ; charset=UTF-8
-            mimeType = mimeType.split(";")[0].trim();
-        }
-        if (content == null) {
-            content = new byte[0];
-        }
-
-        this.id = id;
-        this.data = content;
-        this.mimeType = mimeType;
-        this.metadata = metadata;
-    }
-
-    protected static final InMemoryContentItem fromString(String content) {
-        return new InMemoryContentItem(content.getBytes(), "text/plain");
+    	this(uriString != null? new UriRef(uriString) : null ,
+    	        new InMemoryBlob(content, mimeType),
+    	        metadata);
     }
-
-    @Override
-    public String toString() {
-        return getClass().getSimpleName() + " id=[" + id + "], mimeType[="
-                + mimeType + "], data=[" + data.length + "] bytes"
-                + ", metadata=" + metadata;
+    public InMemoryContentItem(UriRef uriRef, String content, String mimeType) {
+		this(uriRef, new InMemoryBlob(content, mimeType), null);
+	}
+    public InMemoryContentItem(UriRef uri, byte[] content, String mimeType, MGraph metadata) {
+        this(uri, new InMemoryBlob(content, mimeType),metadata);
     }
-
-    public String getId() {
-        return id;
+    protected InMemoryContentItem(String uriString, Blob blob, MGraph metadata) {
+        this(uriString != null ? new UriRef(uriString) : null, blob, metadata);
     }
-
-    public MGraph getMetadata() {
-        return metadata;
+    protected InMemoryContentItem(UriRef uri, Blob blob, MGraph metadata) {
+        super(uri == null ? ContentItemHelper.makeDefaultUrn(blob): uri,blob,
+                metadata == null ? new SimpleMGraph() : metadata);
     }
 
-    public String getMimeType() {
-        return mimeType;
-    }
-
-    public InputStream getStream() {
-        return new ByteArrayInputStream(data);
+	protected static final InMemoryContentItem fromString(String content) {
+        return new InMemoryContentItem(content.getBytes(), "text/plain");
     }
 
 }

Modified: incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/WebContentItem.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/WebContentItem.java?rev=1221148&r1=1221147&r2=1221148&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/WebContentItem.java (original)
+++ incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/WebContentItem.java Tue Dec 20 08:52:07 2011
@@ -16,88 +16,114 @@
 */
 package org.apache.stanbol.enhancer.servicesapi.helper;
 
-import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.net.HttpURLConnection;
 import java.net.URL;
 import java.net.URLConnection;
+import java.util.Map;
 
 import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.UriRef;
 import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
-import org.apache.commons.io.IOUtils;
-import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
 
 /**
- * A ContentItem retrieving its content and MediaType by dereferencing a given URI.
+ * A ContentItem retrieving its content and MediaType by dereferencing a given 
+ * URI. After the content is loaded from the remote server it is cached 
+ * {@link InMemoryBlob in-memory}.
  * 
  * After construction the <code>metadata</code> graph is empty.
  *
  */
 /*
- * The current implementation keeps the content in memory after the firts connection 
+ * The current implementation keeps the content in memory after the first connection 
  * to the remote server. 
  */
-public class WebContentItem implements ContentItem {
-	
-	private final MGraph metadata = new SimpleMGraph();
-	private final URL url;
-	private boolean dereferenced = false;
-	private byte[] data;
-	private String mimeType;
+public class WebContentItem extends ContentItemImpl {
 	
 	/**
-	 * Creates an instance for a given URL
+	 * Creates an instance for a given URL and uses a {@link SimpleMGraph} to
+	 * store metadata in memory.
 	 * 
 	 * @param url the dereferenceable URI
 	 */
-	public WebContentItem(URL url) {
-		this.url = url;
-	}
-
-	@Override
-	public String getId() {
-		return url.toString();
-	}
-
-	@Override
-	public InputStream getStream() {
-		if (!dereferenced) {
-			dereference();
-		}
-		return new ByteArrayInputStream(data);
-	}
-
-	@Override
-	public String getMimeType() {
-		if (!dereferenced) {
-			dereference();
-		}
-		return mimeType;
-	}
-
-	@Override
-	public MGraph getMetadata() {
-		return metadata;
+    public WebContentItem(URL url) {
+        this(url,null);
+    }
+    /**
+     * Creates an instance for a given URL and an existing {@link MGraph} to
+     * store the metadata.
+     * @param url the dereferenceable URI
+     * @param metadata the {@link MGraph} to store the metadata
+     */
+	public WebContentItem(URL url, MGraph metadata) {
+		super(new UriRef(url.toString()), new UrlBlob(url),
+		    metadata == null ? new SimpleMGraph() : metadata);
 	}
 	
-	private synchronized void dereference() {
-		//checking again in the synchronized section
-		if (!dereferenced) {
-			URLConnection uc;
-			try {
-				uc = url.openConnection();
-				data = IOUtils.toByteArray(uc.getInputStream());
-	            mimeType = uc.getContentType();
-	            if (mimeType == null) {
-	                mimeType = "application/octet-stream";
-	            } else {
-	                // Keep only first part of content-types like text/plain ; charset=UTF-8
-	                mimeType = mimeType.split(";")[0].trim();
-	            }
-	            dereferenced = true;
-			} catch (IOException e) {
-				throw new RuntimeException("Exception derefereing URI "+url, e);
-			}
-		}
+	/**
+	 * Blob implementation that dereferences the parsed URL on the first
+	 * access to the Blob. The downloaded content is stored within an
+	 * {@link InMemoryBlob}
+	 *
+	 */
+	private static class UrlBlob implements Blob {
+
+	    private Blob dereferenced;
+        private final URL url;
+        protected UrlBlob(URL url){
+            this.url = url;
+        }
+	    
+        @Override
+        public String getMimeType() {
+            if(dereferenced == null){
+                dereference();
+            }
+            return dereferenced.getMimeType();
+        }
+
+        @Override
+        public InputStream getStream() {
+            if(dereferenced == null){
+                dereference();
+            }
+            return dereferenced.getStream();
+        }
+
+        @Override
+        public Map<String,String> getParameter() {
+            if(dereferenced == null){
+                dereference();
+            }
+            return dereferenced.getParameter();
+        }
+
+        @Override
+        public long getContentLength() {
+            if(dereferenced == null){
+                dereference();
+            }
+            return dereferenced.getContentLength();
+        }
+        
+        private synchronized void dereference() {
+            //checking again in the synchronized section
+            if (dereferenced == null) {
+                URLConnection uc;
+                try {
+                    uc = url.openConnection();
+                    InputStream in = uc.getInputStream();
+                    String mimeType = uc.getContentType();
+                    if (mimeType == null) {
+                        mimeType = "application/octet-stream";
+                    }
+                    dereferenced = new InMemoryBlob(in, mimeType);
+                } catch (IOException e) {
+                    throw new RuntimeException("Exception derefereing URI "+url, e);
+                }
+            }	 
+        }
 	}
 }

Added: incubator/stanbol/trunk/enhancer/generic/servicesapi/src/test/java/org/apache/stanbol/enhancer/serviceapi/helper/AbstractBlobTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/generic/servicesapi/src/test/java/org/apache/stanbol/enhancer/serviceapi/helper/AbstractBlobTest.java?rev=1221148&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/generic/servicesapi/src/test/java/org/apache/stanbol/enhancer/serviceapi/helper/AbstractBlobTest.java (added)
+++ incubator/stanbol/trunk/enhancer/generic/servicesapi/src/test/java/org/apache/stanbol/enhancer/serviceapi/helper/AbstractBlobTest.java Tue Dec 20 08:52:07 2011
@@ -0,0 +1,150 @@
+package org.apache.stanbol.enhancer.serviceapi.helper;
+
+import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Test class intended to be extended by UnitTest classes for specific {@link Blob}
+ * implementations. This class tests if parsed mime-types are handled correctly.
+ * It does not test the actual handling of the data, because this is considered
+ * specific for each Blob implementation.<p>
+ * The {@link #getBlobToTestMimetypeHandling(String)} MUST BE implemented to use
+ * the generic unit tests defined by this class.<p>
+ * <b>NOTE:</b>: {@link Blob} implementation can use the 
+ * {@link ContentItemHelper#parseMimeType(String)} method for parsing 
+ * mime-type string.
+ * @see InMemoryBlobTest
+ */
+public abstract class AbstractBlobTest {
+
+    /**
+     * Getter used to get the Blob to test mime-type handling. The content is
+     * not used for such tests and may be set to anything.
+     * @param mimeType the mimetype
+     * @return
+     */
+    protected abstract Blob getBlobToTestMimetypeHandling(String mimeType);
+
+    @Test
+    public void testNullWildCard(){
+        Blob blob;
+        try {
+            blob = getBlobToTestMimetypeHandling(null);
+        } catch (IllegalArgumentException e) {
+            //if no detection of the mimeType is supported this is expected
+            return;
+        }
+        //if autodetection is supported, check that the mimetype is not null
+        Assert.assertNotNull(blob.getMimeType());
+        Assert.assertFalse(blob.getMimeType().isEmpty());
+    }
+    @Test(expected=IllegalArgumentException.class)
+    public void testEmptyMimeType(){
+        getBlobToTestMimetypeHandling("");
+    }
+    
+    @Test(expected=IllegalArgumentException.class)
+    public void testWildcardType(){
+        getBlobToTestMimetypeHandling("*/*;charset=UTF-8");
+    }
+    @Test(expected=IllegalArgumentException.class)
+    public void testWildcardSubType(){
+        getBlobToTestMimetypeHandling("text/*;charset=UTF-8");
+    }
+    @Test(expected=IllegalArgumentException.class)
+    public void testEmptyMimetype(){
+        getBlobToTestMimetypeHandling(";charset=UTF-8");
+    }
+    @Test(expected=IllegalArgumentException.class)
+    public void testMissingSubType(){
+        getBlobToTestMimetypeHandling("text;charset=UTF-8");
+    }
+    @Test(expected=IllegalArgumentException.class)
+    public void testEmptyType(){
+        getBlobToTestMimetypeHandling("/plain;charset=UTF-8");
+    }
+    @Test(expected=IllegalArgumentException.class)
+    public void testEmptySubType(){
+        getBlobToTestMimetypeHandling("text/;charset=UTF-8");
+    }
+    
+    @Test
+    public void testMimeType(){
+        Blob blob = getBlobToTestMimetypeHandling("text/plain;charset=UTF-8");
+        Assert.assertEquals("text/plain", blob.getMimeType());
+        Assert.assertTrue(blob.getParameter().containsKey("charset"));
+        Assert.assertEquals("UTF-8", blob.getParameter().get("charset"));
+        
+        blob = getBlobToTestMimetypeHandling("text/plain;charset=UTF-8;other=test");
+        Assert.assertEquals("text/plain", blob.getMimeType());
+        Assert.assertTrue(blob.getParameter().containsKey("charset"));
+        Assert.assertEquals("UTF-8", blob.getParameter().get("charset"));
+        Assert.assertTrue(blob.getParameter().containsKey("other"));
+        Assert.assertEquals("test", blob.getParameter().get("other"));
+    }
+    @Test
+    public void testMultipleSeparators(){
+        Blob blob = getBlobToTestMimetypeHandling("text/plain;;charset=UTF-8");
+        Assert.assertEquals("text/plain", blob.getMimeType());
+        Assert.assertTrue(blob.getParameter().containsKey("charset"));
+        Assert.assertEquals("UTF-8", blob.getParameter().get("charset"));
+        
+        blob = getBlobToTestMimetypeHandling("text/plain;charset=UTF-8;;other=test");
+        Assert.assertEquals("text/plain", blob.getMimeType());
+        Assert.assertTrue(blob.getParameter().containsKey("charset"));
+        Assert.assertEquals("UTF-8", blob.getParameter().get("charset"));
+        Assert.assertTrue(blob.getParameter().containsKey("other"));
+        Assert.assertEquals("test", blob.getParameter().get("other"));
+    }
+    @Test
+    public void testIllegalFormatedParameter(){
+        Blob blob = getBlobToTestMimetypeHandling("text/plain;=UTF-8");
+        Assert.assertEquals("text/plain", blob.getMimeType());
+        Assert.assertTrue(blob.getParameter().isEmpty());
+        
+        blob = getBlobToTestMimetypeHandling("text/plain;charset=UTF-8;=illegal");
+        Assert.assertEquals("text/plain", blob.getMimeType());
+        Assert.assertEquals(blob.getParameter().size(),1);
+        Assert.assertTrue(blob.getParameter().containsKey("charset"));
+        Assert.assertEquals("UTF-8", blob.getParameter().get("charset"));
+
+        blob = getBlobToTestMimetypeHandling("text/plain;=illegal;charset=UTF-8");
+        Assert.assertEquals("text/plain", blob.getMimeType());
+        Assert.assertEquals(blob.getParameter().size(),1);
+        Assert.assertTrue(blob.getParameter().containsKey("charset"));
+        Assert.assertEquals("UTF-8", blob.getParameter().get("charset"));
+
+        blob = getBlobToTestMimetypeHandling("text/plain;charset=");
+        Assert.assertEquals("text/plain", blob.getMimeType());
+        Assert.assertTrue(blob.getParameter().isEmpty());
+        blob = getBlobToTestMimetypeHandling("text/plain;charset");
+        Assert.assertEquals("text/plain", blob.getMimeType());
+        Assert.assertTrue(blob.getParameter().isEmpty());
+        
+        blob = getBlobToTestMimetypeHandling("text/plain;charset=UTF-8;test=");
+        Assert.assertEquals("text/plain", blob.getMimeType());
+        Assert.assertEquals(blob.getParameter().size(),1);
+        Assert.assertTrue(blob.getParameter().containsKey("charset"));
+        Assert.assertEquals("UTF-8", blob.getParameter().get("charset"));
+
+        blob = getBlobToTestMimetypeHandling("text/plain;charset=UTF-8;test");
+        Assert.assertEquals("text/plain", blob.getMimeType());
+        Assert.assertEquals(blob.getParameter().size(),1);
+        Assert.assertTrue(blob.getParameter().containsKey("charset"));
+        Assert.assertEquals("UTF-8", blob.getParameter().get("charset"));
+    
+        blob = getBlobToTestMimetypeHandling("text/plain;test;charset=UTF-8;");
+        Assert.assertEquals("text/plain", blob.getMimeType());
+        Assert.assertEquals(blob.getParameter().size(),1);
+        Assert.assertTrue(blob.getParameter().containsKey("charset"));
+        Assert.assertEquals("UTF-8", blob.getParameter().get("charset"));
+    }
+    @Test(expected=UnsupportedOperationException.class)
+    public void testReadOnlyParameter(){
+        Blob blob = getBlobToTestMimetypeHandling("text/plain;test;charset=UTF-8");
+        blob.getParameter().put("test", "dummy");
+    }
+
+}

Propchange: incubator/stanbol/trunk/enhancer/generic/servicesapi/src/test/java/org/apache/stanbol/enhancer/serviceapi/helper/AbstractBlobTest.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/enhancer/generic/servicesapi/src/test/java/org/apache/stanbol/enhancer/serviceapi/helper/ContentItemTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/generic/servicesapi/src/test/java/org/apache/stanbol/enhancer/serviceapi/helper/ContentItemTest.java?rev=1221148&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/generic/servicesapi/src/test/java/org/apache/stanbol/enhancer/serviceapi/helper/ContentItemTest.java (added)
+++ incubator/stanbol/trunk/enhancer/generic/servicesapi/src/test/java/org/apache/stanbol/enhancer/serviceapi/helper/ContentItemTest.java Tue Dec 20 08:52:07 2011
@@ -0,0 +1,100 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.stanbol.enhancer.serviceapi.helper;
+
+import java.util.Date;
+
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.NoSuchPartException;
+import org.apache.stanbol.enhancer.servicesapi.helper.InMemoryBlob;
+import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemImpl;
+import org.junit.Assert;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class ContentItemTest {
+    
+    private final Logger log = LoggerFactory.getLogger(ContentItemTest.class);
+    private static final UriRef ciUri = new UriRef("http://example.org/");
+    private static final Blob blob = new InMemoryBlob("hello", null);
+
+    @Test(expected=IllegalArgumentException.class)
+    public void missingUri(){
+        new ContentItemImpl(null,blob,new SimpleMGraph()){};
+    }
+    @Test(expected=IllegalArgumentException.class)
+    public void missingBlob(){
+        new ContentItemImpl(ciUri,null,new SimpleMGraph()){};
+    }
+    @Test(expected=IllegalArgumentException.class)
+    public void missingMetadata(){
+        new ContentItemImpl(ciUri,blob,null){};
+    }
+    
+	@Test
+	public void addingAndRetrieving() {
+		ContentItem ci = new ContentItemImpl(ciUri,blob,new SimpleMGraph()){};
+		UriRef partUri = new UriRef("http://foo/");
+		Date someObject = new Date();
+		ci.addPart(partUri, someObject);
+		ci.getMetadata().add(new TripleImpl(ciUri, new UriRef("http://example.org/ontology#hasPart"), partUri));
+        ci.getMetadata().add(new TripleImpl(partUri, new UriRef("http://example.org/ontology#isPartOf"),ciUri));
+		Assert.assertEquals(someObject, ci.getPart(partUri, Date.class));
+		Assert.assertEquals(someObject, ci.getPart(1, Date.class));
+		Assert.assertEquals(partUri, ci.getPartUri(1));
+		Assert.assertEquals(new UriRef(ciUri.getUnicodeString()+"_main"), ci.getPartUri(0));
+		try {
+		    ci.getPart(2, Object.class);
+		    Assert.assertTrue("Requesting non existance part MUST throw an NoSuchPartException", false);
+		} catch (NoSuchPartException e) {/* expected*/}
+        try {
+            ci.getPart(new UriRef("http://foo/nonexisting"), Object.class);
+            Assert.assertTrue("Requesting non existance part MUST throw an NoSuchPartException", false);
+        } catch (NoSuchPartException e) {/* expected*/}
+        try {
+            ci.getPartUri(2);
+            Assert.assertTrue("Requesting non existance part MUST throw an NoSuchPartException", false);
+        } catch (NoSuchPartException e) {/* expected*/}
+		//finally log the toString
+		log.info("toString: {}",ci);
+	}
+	@Test(expected=IllegalArgumentException.class)
+	public void addPartWithoutUri(){
+	    ContentItem ci = new ContentItemImpl(ciUri,blob,new SimpleMGraph()){};
+	    ci.addPart(null, new Date());
+	}
+    @Test(expected=IllegalArgumentException.class)
+    public void addPartWithoutPartContent(){
+        ContentItem ci = new ContentItemImpl(ciUri,blob,new SimpleMGraph()){};
+        ci.addPart(new UriRef("http://foo/"), null);
+    }
+    /**
+     * The ContentItem MUST NOT allow to replace the main content part (the
+     * Blob stored at index 0)
+     */
+    @Test(expected=IllegalArgumentException.class)
+    public void replaceMainPart(){
+        ContentItem ci = new ContentItemImpl(ciUri,blob,new SimpleMGraph()){};
+        UriRef mainPart = ci.getPartUri(0);
+        ci.addPart(mainPart, new Date());
+    }
+}

Propchange: incubator/stanbol/trunk/enhancer/generic/servicesapi/src/test/java/org/apache/stanbol/enhancer/serviceapi/helper/ContentItemTest.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: incubator/stanbol/trunk/enhancer/generic/servicesapi/src/test/java/org/apache/stanbol/enhancer/serviceapi/helper/EnhancementEngineHelperTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/generic/servicesapi/src/test/java/org/apache/stanbol/enhancer/serviceapi/helper/EnhancementEngineHelperTest.java?rev=1221148&r1=1221147&r2=1221148&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/generic/servicesapi/src/test/java/org/apache/stanbol/enhancer/serviceapi/helper/EnhancementEngineHelperTest.java (original)
+++ incubator/stanbol/trunk/enhancer/generic/servicesapi/src/test/java/org/apache/stanbol/enhancer/serviceapi/helper/EnhancementEngineHelperTest.java Tue Dec 20 08:52:07 2011
@@ -32,6 +32,7 @@ import org.apache.stanbol.enhancer.servi
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.helper.InMemoryContentItem;
 import org.junit.Test;
 
 
@@ -51,29 +52,14 @@ public class EnhancementEngineHelperTest
 
     @Test
     public void testEnhancementEngineHelper() throws Exception {
-        ContentItem ci = new ContentItem() {
-            MGraph mgraph = new SimpleMGraph();
-            @Override
-            public InputStream getStream() {
-                return new ByteArrayInputStream("There is content".getBytes());
-            }
-
-            @Override
-            public String getMimeType() { return "text/plain"; }
-
-            @Override
-            public MGraph getMetadata() { return mgraph; }
-
-            @Override
-            public String getId() { return "urn:test:contentItem"; }
-        };
+        ContentItem ci = new InMemoryContentItem(new UriRef("urn:test:contentItem"), "There is content", "text/plain");
         EnhancementEngine engine = new MyEngine();
 
         UriRef extraction = EnhancementEngineHelper.createNewExtraction(ci, engine);
         MGraph metadata = ci.getMetadata();
 
         assertTrue(metadata.contains(new TripleImpl(extraction,
-                ENHANCER_RELATED_CONTENT_ITEM, new UriRef(ci.getId()))));
+                ENHANCER_RELATED_CONTENT_ITEM, new UriRef(ci.getUri().getUnicodeString()))));
         assertTrue(metadata.contains(new TripleImpl(extraction,
                 RDF_TYPE, ENHANCER_EXTRACTION)));
         // and so on

Added: incubator/stanbol/trunk/enhancer/generic/servicesapi/src/test/java/org/apache/stanbol/enhancer/serviceapi/helper/InMemoryBlobTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/generic/servicesapi/src/test/java/org/apache/stanbol/enhancer/serviceapi/helper/InMemoryBlobTest.java?rev=1221148&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/generic/servicesapi/src/test/java/org/apache/stanbol/enhancer/serviceapi/helper/InMemoryBlobTest.java (added)
+++ incubator/stanbol/trunk/enhancer/generic/servicesapi/src/test/java/org/apache/stanbol/enhancer/serviceapi/helper/InMemoryBlobTest.java Tue Dec 20 08:52:07 2011
@@ -0,0 +1,68 @@
+package org.apache.stanbol.enhancer.serviceapi.helper;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.nio.charset.Charset;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.helper.InMemoryBlob;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class InMemoryBlobTest extends AbstractBlobTest {
+    
+    private static final Charset UTF8 = Charset.forName("UTF-8");
+    
+    /*
+     * Override to test InMemoryBlob instead of AbstractBlob
+     * @see org.apache.stanbol.enhancer.serviceapi.helper.BlobMimeTypeHandlingTest#getBlobToTestMimetypeHandling(java.lang.String)
+     */
+    @Override
+    protected Blob getBlobToTestMimetypeHandling(String mimeType) {
+        return new InMemoryBlob("dummy".getBytes(UTF8), mimeType);
+    }
+    /**
+     * Tests correct handling of strings and the DEFAULT mimeType for strings
+     * "text/plain"
+     * @throws IOException
+     */
+    @Test
+    public void testString() throws IOException{
+        String test = "Exámplê";
+        Blob blob = new InMemoryBlob(test, null);
+        Assert.assertEquals("text/plain", blob.getMimeType());
+        Assert.assertTrue(blob.getParameter().containsKey("charset"));
+        Assert.assertEquals(UTF8.name(), blob.getParameter().get("charset"));
+        
+        String value = new String(IOUtils.toByteArray(blob.getStream()),UTF8);
+        Assert.assertEquals(test, value);
+    }
+    /**
+     * Tests that any parsed Charset is replaced by UTF-8 actually used to
+     * convert the String into bytes.
+     * @throws IOException
+     */
+    @Test
+    public void testStringWithCharset() throws IOException{
+        String test = "Exámplê";
+        Blob blob = new InMemoryBlob(test, "text/plain;charset=ISO-8859-4");
+        Assert.assertEquals("text/plain", blob.getMimeType());
+        Assert.assertTrue(blob.getParameter().containsKey("charset"));
+        Assert.assertEquals(UTF8.name(), blob.getParameter().get("charset"));
+    }
+    /**
+     * Tests the default mimeType "application/octet-stream" for binary data.
+     * @throws IOException
+     */
+    @Test
+    public void testDefaultBinaryMimeType() throws IOException {
+        Blob blob = new InMemoryBlob("dummy".getBytes(UTF8), null);
+        Assert.assertEquals("application/octet-stream", blob.getMimeType());
+        Assert.assertTrue(blob.getParameter().isEmpty());
+
+        blob = new InMemoryBlob(new ByteArrayInputStream("dummy".getBytes(UTF8)), null);
+        Assert.assertEquals("application/octet-stream", blob.getMimeType());
+        Assert.assertTrue(blob.getParameter().isEmpty());
+    }
+}

Propchange: incubator/stanbol/trunk/enhancer/generic/servicesapi/src/test/java/org/apache/stanbol/enhancer/serviceapi/helper/InMemoryBlobTest.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: incubator/stanbol/trunk/enhancer/generic/servicesapi/src/test/java/org/apache/stanbol/enhancer/serviceapi/helper/TestEnhancementInterfaces.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/generic/servicesapi/src/test/java/org/apache/stanbol/enhancer/serviceapi/helper/TestEnhancementInterfaces.java?rev=1221148&r1=1221147&r2=1221148&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/generic/servicesapi/src/test/java/org/apache/stanbol/enhancer/serviceapi/helper/TestEnhancementInterfaces.java (original)
+++ incubator/stanbol/trunk/enhancer/generic/servicesapi/src/test/java/org/apache/stanbol/enhancer/serviceapi/helper/TestEnhancementInterfaces.java Tue Dec 20 08:52:07 2011
@@ -38,6 +38,7 @@ import org.apache.stanbol.enhancer.servi
 import org.apache.stanbol.enhancer.servicesapi.EntityAnnotation;
 import org.apache.stanbol.enhancer.servicesapi.TextAnnotation;
 import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.helper.InMemoryContentItem;
 import org.apache.stanbol.enhancer.servicesapi.helper.RdfEntityFactory;
 import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;
 import org.junit.Test;
@@ -55,20 +56,14 @@ public class TestEnhancementInterfaces {
     public static final UriRef TEST_ENHANCEMENT_ENGINE_URI = new UriRef("urn:test:dummyEnhancementEngine");
 
     public static ContentItem wrapAsContentItem(final String id, final String text) {
-        return new ContentItem() {
-            SimpleMGraph metadata = new SimpleMGraph();
-            public InputStream getStream() { return new ByteArrayInputStream(text.getBytes());}
-            public String getMimeType() { return "text/plain"; }
-            public MGraph getMetadata() { return metadata; }
-            public String getId() { return id; }
-        };
+    	return new InMemoryContentItem(id, text, "text/plain");
     }
 
     @Test
     public void testEnhancementInterfaces() throws Exception {
         ContentItem ci = wrapAsContentItem("urn:contentItem-"
                 + EnhancementEngineHelper.randomUUID(),SINGLE_SENTENCE);
-        UriRef ciUri = new UriRef(ci.getId());
+        UriRef ciUri = new UriRef(ci.getUri().getUnicodeString());
         RdfEntityFactory factory = RdfEntityFactory.createInstance(ci.getMetadata());
         long start = System.currentTimeMillis();
         //create an Text Annotation representing an extracted Person

Modified: incubator/stanbol/trunk/enhancer/generic/standalone/src/main/java/org/apache/stanbol/enhancer/standalone/impl/InMemoryStore.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/generic/standalone/src/main/java/org/apache/stanbol/enhancer/standalone/impl/InMemoryStore.java?rev=1221148&r1=1221147&r2=1221148&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/generic/standalone/src/main/java/org/apache/stanbol/enhancer/standalone/impl/InMemoryStore.java (original)
+++ incubator/stanbol/trunk/enhancer/generic/standalone/src/main/java/org/apache/stanbol/enhancer/standalone/impl/InMemoryStore.java Tue Dec 20 08:52:07 2011
@@ -79,11 +79,11 @@ public class InMemoryStore implements St
     public String put(ContentItem ci) {
         synchronized (data) {
 
-            data.put(ci.getId(), ci);
+            data.put(ci.getUri().getUnicodeString(), ci);
 
             // remove any previously stored data about ci
             MGraph g = getEnhancementGraph();
-            UriRef uri = new UriRef(ci.getId());
+            UriRef uri = ci.getUri();
             Iterator<Triple> toRemove = g.filter(uri, null, null);
             while (toRemove.hasNext()) {
                 toRemove.next();
@@ -99,7 +99,7 @@ public class InMemoryStore implements St
             // accumulate all triples recently collected
             getEnhancementGraph().addAll(ci.getMetadata());
         }
-        return ci.getId();
+        return ci.getUri().getUnicodeString();
     }
 
     @Override

Modified: incubator/stanbol/trunk/enhancer/generic/standalone/src/main/java/org/apache/stanbol/enhancer/standalone/impl/MainServlet.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/generic/standalone/src/main/java/org/apache/stanbol/enhancer/standalone/impl/MainServlet.java?rev=1221148&r1=1221147&r2=1221148&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/generic/standalone/src/main/java/org/apache/stanbol/enhancer/standalone/impl/MainServlet.java (original)
+++ incubator/stanbol/trunk/enhancer/generic/standalone/src/main/java/org/apache/stanbol/enhancer/standalone/impl/MainServlet.java Tue Dec 20 08:52:07 2011
@@ -99,7 +99,7 @@ public class MainServlet extends HttpSer
         log.info("Created {}, registered with EnhancementJobManager", ci);
         resp.setContentType("text/plain");
         resp.setCharacterEncoding("UTF-8");
-        resp.getWriter().write(ci.getId());
+        resp.getWriter().write(ci.getUri().getUnicodeString());
         resp.getWriter().write('\n');
     }
 
@@ -127,7 +127,7 @@ public class MainServlet extends HttpSer
 
     private static void dumpContentItem(ContentItem ci, PrintWriter w) {
         w.print("**ContentItem:");
-        w.println(ci.getId());
+        w.println(ci.getUri().getUnicodeString());
         w.println("**Metadata:");
         for (Triple o : ci.getMetadata().getGraph()) {
             w.println(o);

Modified: incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java?rev=1221148&r1=1221147&r2=1221148&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java (original)
+++ incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java Tue Dec 20 08:52:07 2011
@@ -148,7 +148,7 @@ public class ContentItemResource extends
                 this.imageSrc = rawURI;
             }
             else {
-              Iterator<Triple> it = ci.getMetadata().filter(new UriRef(ci.getId()), NIE_PLAINTEXTCONTENT, null);
+              Iterator<Triple> it = ci.getMetadata().filter(ci.getUri(), NIE_PLAINTEXTCONTENT, null);
               if (it.hasNext()) {
                 this.textContent = ((Literal)it.next().getObject()).getLexicalForm();
               }

Modified: incubator/stanbol/trunk/enhancer/jobmanager/weightedjobmanager/src/main/java/org/apache/stanbol/enhancer/jobmanager/impl/WeightedJobManager.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/jobmanager/weightedjobmanager/src/main/java/org/apache/stanbol/enhancer/jobmanager/impl/WeightedJobManager.java?rev=1221148&r1=1221147&r2=1221148&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/jobmanager/weightedjobmanager/src/main/java/org/apache/stanbol/enhancer/jobmanager/impl/WeightedJobManager.java (original)
+++ incubator/stanbol/trunk/enhancer/jobmanager/weightedjobmanager/src/main/java/org/apache/stanbol/enhancer/jobmanager/impl/WeightedJobManager.java Tue Dec 20 08:52:07 2011
@@ -74,15 +74,15 @@ public class WeightedJobManager implemen
             long startEngine = System.currentTimeMillis();
             if (engine.canEnhance(ci) == EnhancementEngine.CANNOT_ENHANCE) {
                 log.debug("[{}] cannot be enhanced by engine [{}], skipping",
-                        ci.getId(), engine);
+                        ci.getUri().getUnicodeString(), engine);
             } else {
                 // TODO should handle sync/async enhancing. All sync for now.
                 engine.computeEnhancements(ci);
                 log.debug("ContentItem [{}] enhanced by engine [{}] in {}ms",
-                        new Object[]{ci.getId(), engine,System.currentTimeMillis()-startEngine});
+                        new Object[]{ci.getUri().getUnicodeString(), engine,System.currentTimeMillis()-startEngine});
             }
         }
-        log.debug("ContentItem [{}] enhanced in {}ms",ci.getId(),(System.currentTimeMillis()-start));
+        log.debug("ContentItem [{}] enhanced in {}ms",ci.getUri().getUnicodeString(),(System.currentTimeMillis()-start));
     }
     
     @Override