You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/04/08 17:12:46 UTC

svn commit: r1311019 [2/5] - in /incubator/stanbol/trunk: ./ contenthub/search/featured/src/main/java/org/apache/stanbol/contenthub/search/featured/ contenthub/search/related/src/main/java/org/apache/stanbol/contenthub/search/related/ contenthub/store/...

Modified: incubator/stanbol/trunk/enhancer/engines/metaxa/src/main/java/org/apache/stanbol/enhancer/engines/metaxa/MetaxaEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/metaxa/src/main/java/org/apache/stanbol/enhancer/engines/metaxa/MetaxaEngine.java?rev=1311019&r1=1311018&r2=1311019&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/metaxa/src/main/java/org/apache/stanbol/enhancer/engines/metaxa/MetaxaEngine.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/metaxa/src/main/java/org/apache/stanbol/enhancer/engines/metaxa/MetaxaEngine.java Sun Apr  8 15:12:40 2012
@@ -19,7 +19,6 @@ package org.apache.stanbol.enhancer.engi
 import static org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.randomUUID;
 
 import java.io.BufferedWriter;
-import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.OutputStreamWriter;
 import java.nio.charset.Charset;
@@ -41,8 +40,10 @@ import org.apache.clerezza.rdf.core.impl
 import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
 import org.apache.clerezza.rdf.core.impl.TripleImpl;
 import org.apache.clerezza.rdf.core.impl.TypedLiteralImpl;
+import org.apache.commons.io.IOUtils;
 import org.apache.felix.scr.annotations.Component;
 import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.Reference;
 import org.apache.felix.scr.annotations.Service;
 import org.apache.stanbol.enhancer.engines.metaxa.core.MetaxaCore;
 import org.apache.stanbol.enhancer.engines.metaxa.core.RDF2GoUtils;
@@ -50,13 +51,13 @@ import org.apache.stanbol.enhancer.engin
 import org.apache.stanbol.enhancer.engines.metaxa.core.html.HtmlExtractorFactory;
 import org.apache.stanbol.enhancer.servicesapi.Blob;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
+import org.apache.stanbol.enhancer.servicesapi.ContentSink;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
-import org.apache.stanbol.enhancer.servicesapi.helper.AbstractEnhancementEngine;
-import org.apache.stanbol.enhancer.servicesapi.helper.InMemoryBlob;
+import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
-import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
 import org.ontoware.aifbcommons.collection.ClosableIterator;
 import org.ontoware.rdf2go.model.Model;
 import org.ontoware.rdf2go.model.Statement;
@@ -90,7 +91,10 @@ public class MetaxaEngine 
         implements EnhancementEngine, ServiceProperties {
 
     private static final Logger log = LoggerFactory.getLogger(MetaxaEngine.class);
-
+    /**
+     * The default charset
+     */
+    private static final Charset UTF8 = Charset.forName("UTF-8");
     /**
      * Plain text content of a content item.
       */
@@ -122,6 +126,14 @@ public class MetaxaEngine 
      */
     @Property(boolValue=false)
     public static final String INCLUDE_TEXT_IN_METADATA = "org.apache.stanbol.enhancer.engines.metaxa.includeText";
+    
+    /**
+     * Internally used to create additional {@link Blob} for transformed
+     * versions af the original content
+     */
+    @Reference
+    private ContentItemFactory ciFactory;
+    
     private MetaxaCore extractor;
     
     BundleContext bundleContext;
@@ -131,7 +143,7 @@ public class MetaxaEngine 
     
     private Set<String> ignoredMimeTypes;
     private boolean includeText = false;
-
+    
     /**
      * The activate method.
      *
@@ -202,93 +214,98 @@ public class MetaxaEngine 
     }
 
     public void computeEnhancements(ContentItem ci) throws EngineException {
+        // get model from the extraction
+        URIImpl docId;
+        Model m = null;
+        ci.getLock().readLock().lock();
         try {
-            // get model from the extraction
-            URIImpl docId;
-            Model m;
-            ci.getLock().readLock().lock();
-            try {
-                docId = new URIImpl(ci.getUri().getUnicodeString());
-                m = this.extractor.extract(ci.getStream(), docId, ci.getMimeType());
-            } finally {
-                ci.getLock().readLock().unlock();
-            }
-            // add the statements from this model to the Metadata model
-            if (null != m) {
-                /*
-               String text = MetaxaCore.getText(m);
-               log.info(text);
-                */
-                // get the model where to add the statements
-                /*
-                 * NOTE(rweten): 
-                 *  There is no need to create an TextEnhancement to mark that
-                 *  a ContentItem was processed by Metaxa, because the
-                 *  ExecutionMetadata do record this anyway.
-                 */
-                //     
-                // create enhancement
-                //UriRef textEnhancement = EnhancementEngineHelper.createTextEnhancement(ci, this);
-                // set confidence value to 1.0
-                //g.add(new TripleImpl(textEnhancement, Properties.ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(1.0)));
-                RDF2GoUtils.urifyBlankNodes(m);
-                HashMap<BlankNode, BNode> blankNodeMap = new HashMap<BlankNode, BNode>();
-                ClosableIterator<Statement> it = m.iterator();
-                ByteArrayOutputStream byteOut = new ByteArrayOutputStream();
-                Charset charset = Charset.forName("UTF-8");
-                BufferedWriter out = new BufferedWriter(new OutputStreamWriter(byteOut, charset));
-                MGraph g = new SimpleMGraph(); //first add to a temporary graph
-                while (it.hasNext()) {
-                    Statement oneStmt = it.next();
-                    //we need to treat triples that provide the plain/text
-                    //version differently. Such Objects need to be added to
-                    //the plain text Blob!
-                    if(oneStmt.getSubject().equals(docId) && 
-                            oneStmt.getPredicate().equals(NIE_PLAINTEXT_PROPERTY)){
-                        out.write(oneStmt.getObject().toString());
-                        if (includeText) {
-                          NonLiteral subject = (NonLiteral) asClerezzaResource(oneStmt.getSubject(), blankNodeMap);
-                          UriRef predicate = (UriRef) asClerezzaResource(oneStmt.getPredicate(), blankNodeMap);
-                          Resource object = asClerezzaResource(oneStmt.getObject(), blankNodeMap);
-                          g.add(new TripleImpl(subject, predicate, object));
+            docId = new URIImpl(ci.getUri().getUnicodeString());
+            m = this.extractor.extract(ci.getStream(), docId, ci.getMimeType());
+        } catch (ExtractorException e) {
+            throw new EngineException("Error while processing ContentItem "
+                + ci.getUri()+" with Metaxa",e);
+        } catch (IOException e) {
+            throw new EngineException("Error while processing ContentItem "
+                    + ci.getUri()+" with Metaxa",e);
+        } finally {
+            ci.getLock().readLock().unlock();
+        }
+        // Convert the RDF2go model to a Clerezza Graph and also extract
+        // the extracted plain text from the model
+        if (null == m) {
+            log.debug("Unable to preocess ContentItem {} (mime type {}) with Metaxa",
+                ci.getUri(),ci.getMimeType());
+            return;
+        }
+        ContentSink plainTextSink;
+        try {
+            plainTextSink = ciFactory.createContentSink("text/plain");
+        } catch (IOException e) {
+            m.close();
+            throw new EngineException("Unable to initialise Blob for storing" +
+            		"the plain text content",e);
+        }
+        HashMap<BlankNode, BNode> blankNodeMap = new HashMap<BlankNode, BNode>();
+        RDF2GoUtils.urifyBlankNodes(m);
+        ClosableIterator<Statement> it = m.iterator();
+        BufferedWriter out = new BufferedWriter(new OutputStreamWriter(
+            plainTextSink.getOutputStream(), UTF8));
+        boolean textExtracted = false; //used to detect if some text was extracted
+        try {
+            MGraph g = new SimpleMGraph(); //first add to a temporary graph
+            while (it.hasNext()) {
+                Statement oneStmt = it.next();
+                //we need to treat triples that provide the plain/text
+                //version differently. Such Objects need to be added to
+                //the plain text Blob!
+                if(oneStmt.getSubject().equals(docId) && 
+                        oneStmt.getPredicate().equals(NIE_PLAINTEXT_PROPERTY)){
+                    String text = oneStmt.getObject().toString();
+                    if(text != null && !text.isEmpty()){
+                        try {
+                            out.write(oneStmt.getObject().toString());
+                        } catch (IOException e) {
+                            throw new EngineException("Unable to write extracted" +
+                            		"plain text to Blob (blob impl: "
+                                    + plainTextSink.getBlob().getClass()+")",e);
                         }
-                    } else { //add metadata to the metadata of the contentItem
-                        NonLiteral subject = (NonLiteral) asClerezzaResource(oneStmt.getSubject(), blankNodeMap);
-                        UriRef predicate = (UriRef) asClerezzaResource(oneStmt.getPredicate(), blankNodeMap);
-                        Resource object = asClerezzaResource(oneStmt.getObject(), blankNodeMap);
-    
-                        if (null != subject && null != predicate && null != object) {
-                            Triple t = new TripleImpl(subject, predicate, object);
-                            g.add(t);
-                            log.debug("added " + t.toString());
+                        textExtracted = true;
+                        if (includeText) {
+                            NonLiteral subject = (NonLiteral) asClerezzaResource(oneStmt.getSubject(), blankNodeMap);
+                            UriRef predicate = (UriRef) asClerezzaResource(oneStmt.getPredicate(), blankNodeMap);
+                            Resource object = asClerezzaResource(oneStmt.getObject(), blankNodeMap);
+                            g.add(new TripleImpl(subject, predicate, object));
                         }
                     }
+                } else { //add metadata to the metadata of the contentItem
+                    NonLiteral subject = (NonLiteral) asClerezzaResource(oneStmt.getSubject(), blankNodeMap);
+                    UriRef predicate = (UriRef) asClerezzaResource(oneStmt.getPredicate(), blankNodeMap);
+                    Resource object = asClerezzaResource(oneStmt.getObject(), blankNodeMap);
+
+                    if (null != subject && null != predicate && null != object) {
+                        Triple t = new TripleImpl(subject, predicate, object);
+                        g.add(t);
+                        log.debug("added " + t.toString());
+                    }
                 }
-                ci.getLock().writeLock().lock();
-                try { 
-                    //now acquire a write lock and add the extracted 
-                    //metadata to the content item
-                    ci.getMetadata().addAll(g);
-                } finally {
-                    ci.getLock().writeLock().unlock();
-                }
-                out.close();
-                byte[] plainTextData = byteOut.toByteArray();
-                if(plainTextData.length > 0){
-                    //add plain text to the content item
-                    UriRef blobUri = new UriRef("urn:metaxa:plain-text:"+randomUUID());
-                    Blob plainTextBlob = new InMemoryBlob(plainTextData, 
-                        "text/plain;charset="+charset.toString());
-                    ci.addPart(blobUri, plainTextBlob);
-                    //TODO: add contentPart metadata to the contentItem
-                }
-                it.close();
-                m.close();
             }
-        } catch (ExtractorException e) {
-            throw new EngineException(e.getLocalizedMessage(), e);
-        } catch (IOException e) {
-            throw new EngineException(e.getLocalizedMessage(), e);
+            //add the extracted triples to the metadata of the ContentItem
+            ci.getLock().writeLock().lock();
+            try { 
+                ci.getMetadata().addAll(g);
+                g = null;
+            } finally {
+                ci.getLock().writeLock().unlock();
+            }
+        } finally {
+            it.close();
+            m.close();
+            IOUtils.closeQuietly(out);
+        }
+        if(textExtracted){
+            //add plain text to the content item
+            UriRef blobUri = new UriRef("urn:metaxa:plain-text:"+randomUUID());
+            ci.addPart(blobUri, plainTextSink.getBlob());
         }
     }
 

Modified: incubator/stanbol/trunk/enhancer/engines/opencalais/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/opencalais/pom.xml?rev=1311019&r1=1311018&r2=1311019&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/opencalais/pom.xml (original)
+++ incubator/stanbol/trunk/enhancer/engines/opencalais/pom.xml Sun Apr  8 15:12:40 2012
@@ -96,7 +96,6 @@
     <dependency>
       <groupId>org.apache.stanbol</groupId>
       <artifactId>org.apache.stanbol.commons.stanboltools.offline</artifactId>
-      <scope>provided</scope>
     </dependency>
 
     <dependency>
@@ -110,28 +109,32 @@
     <dependency>
       <groupId>org.apache.clerezza</groupId>
       <artifactId>rdf.jena.parser</artifactId>
-      <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.clerezza</groupId>
       <artifactId>rdf.jena.sparql</artifactId>
-      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
     </dependency>
 
     <dependency>
       <groupId>org.slf4j</groupId>
       <artifactId>slf4j-api</artifactId>
     </dependency>
+  
+    <!-- Test dependencies -->
     <dependency>
-      <groupId>org.slf4j</groupId>
-      <artifactId>slf4j-simple</artifactId>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.core</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
-      <groupId>commons-io</groupId>
-      <artifactId>commons-io</artifactId>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-simple</artifactId>
+      <scope>test</scope>
     </dependency>
-
     <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>

Modified: incubator/stanbol/trunk/enhancer/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java?rev=1311019&r1=1311018&r2=1311019&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java Sun Apr  8 15:12:40 2012
@@ -82,9 +82,9 @@ import org.apache.stanbol.enhancer.servi
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
 import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
-import org.apache.stanbol.enhancer.servicesapi.helper.AbstractEnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
 import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
 import org.osgi.framework.BundleContext;
 import org.osgi.service.cm.ConfigurationException;
 import org.osgi.service.component.ComponentContext;

Modified: incubator/stanbol/trunk/enhancer/engines/opencalais/src/test/java/org/apache/stanbol/enhancer/engines/opencalais/impl/TestOpenCalaisEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/opencalais/src/test/java/org/apache/stanbol/enhancer/engines/opencalais/impl/TestOpenCalaisEngine.java?rev=1311019&r1=1311018&r2=1311019&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/opencalais/src/test/java/org/apache/stanbol/enhancer/engines/opencalais/impl/TestOpenCalaisEngine.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/opencalais/src/test/java/org/apache/stanbol/enhancer/engines/opencalais/impl/TestOpenCalaisEngine.java Sun Apr  8 15:12:40 2012
@@ -16,6 +16,7 @@
  */
 package org.apache.stanbol.enhancer.engines.opencalais.impl;
 
+import java.io.IOException;
 import java.io.InputStream;
 import java.util.Collection;
 import java.util.HashMap;
@@ -25,9 +26,11 @@ import org.apache.clerezza.rdf.core.MGra
 import org.apache.clerezza.rdf.core.UriRef;
 import org.apache.clerezza.rdf.core.access.TcManager;
 import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
-import org.apache.stanbol.enhancer.servicesapi.helper.InMemoryContentItem;
+import org.apache.stanbol.enhancer.servicesapi.impl.StringSource;
 import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
 import org.junit.Assert;
 import org.junit.Assume;
@@ -52,6 +55,7 @@ public class TestOpenCalaisEngine {
 
   private static OpenCalaisEngine calaisExtractor;
 
+  private static final ContentItemFactory ciFactory = InMemoryContentItemFactory.getInstance();
   private static String TEST_LICENSE_KEY = System.getProperty(OpenCalaisEngine.LICENSE_KEY);
   private static String TEST_TEXT = "Israeli PM Netanyahu pulls out of US nuclear summit\nIsraeli PM Benjamin Netanyahu has cancelled a visit to the US where he was to attend a summit on nuclear security, Israeli officials say. Mr Netanyahu made the decision after learning that Egypt and Turkey intended to raise the issue of Israel's presumed nuclear arsenal, the officials said. Mr Obama is due to host dozens of world leaders at the two-day conference, which begins in Washington on Monday. Israel has never confirmed or denied that it possesses atomic weapons. Israel's Intelligence and Atomic Energy Minister Dan Meridor will take Netanyahu's place in the nuclear summit, Israeli radio said. More than 40 countries are expected at the meeting, which will focus on preventing the spread of nuclear weapons to militant groups.";
   
@@ -65,8 +69,8 @@ public class TestOpenCalaisEngine {
     }
   }
 
-  public static ContentItem wrapAsContentItem(final String text) {
-	  return new InMemoryContentItem((UriRef)null, text, "text/plain");
+  public static ContentItem wrapAsContentItem(final String text) throws IOException {
+	  return ciFactory.createContentItem(new StringSource(text));
   }
   
   @Test
@@ -86,7 +90,7 @@ public class TestOpenCalaisEngine {
   }
 
   @Test
-  public void testCalaisConnection() {
+  public void testCalaisConnection() throws IOException {
   	Assume.assumeNotNull(calaisExtractor.getLicenseKey());
   	try {
   	  ContentItem ci = wrapAsContentItem(TEST_TEXT);

Modified: incubator/stanbol/trunk/enhancer/engines/opennlp-ner/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/opennlp-ner/pom.xml?rev=1311019&r1=1311018&r2=1311019&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/opennlp-ner/pom.xml (original)
+++ incubator/stanbol/trunk/enhancer/engines/opennlp-ner/pom.xml Sun Apr  8 15:12:40 2012
@@ -93,6 +93,11 @@
 
     <!-- for tests -->
     <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.core</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
       <scope>test</scope>

Modified: incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NamedEntityExtractionEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NamedEntityExtractionEnhancementEngine.java?rev=1311019&r1=1311018&r2=1311019&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NamedEntityExtractionEnhancementEngine.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NamedEntityExtractionEnhancementEngine.java Sun Apr  8 15:12:40 2012
@@ -34,7 +34,7 @@ import org.apache.stanbol.enhancer.servi
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
-import org.apache.stanbol.enhancer.servicesapi.helper.AbstractEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
 import org.osgi.framework.Constants;
 import org.osgi.framework.ServiceRegistration;
 import org.osgi.service.cm.ConfigurationException;

Modified: incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java?rev=1311019&r1=1311018&r2=1311019&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java Sun Apr  8 15:12:40 2012
@@ -37,9 +37,12 @@ import org.apache.clerezza.rdf.core.Reso
 import org.apache.clerezza.rdf.core.Triple;
 import org.apache.clerezza.rdf.core.TypedLiteral;
 import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItem;
+import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
-import org.apache.stanbol.enhancer.servicesapi.helper.InMemoryContentItem;
+import org.apache.stanbol.enhancer.servicesapi.impl.StringSource;
 import org.junit.Assert;
 import org.junit.BeforeClass;
 import org.junit.Test;
@@ -59,6 +62,7 @@ public class TestNamedEntityExtractionEn
             + " without any name.\n"
             + "A new paragraph is being written. This paragraph has two sentences.";
 
+    private static ContentItemFactory ciFactory = InMemoryContentItemFactory.getInstance();
     static NEREngineCore nerEngine;
     
     public static final String FAKE_BUNDLE_SYMBOLIC_NAME = "FAKE_BUNDLE_SYMBOLIC_NAME";
@@ -71,8 +75,8 @@ public class TestNamedEntityExtractionEn
     }
 
     public static ContentItem wrapAsContentItem(final String id,
-            final String text) {
-    	return new InMemoryContentItem(id, text, "text/plain");
+            final String text) throws IOException {
+    	return ciFactory.createContentItem(new UriRef(id),new StringSource(text));
     }
 
     @Test
@@ -127,7 +131,7 @@ public class TestNamedEntityExtractionEn
 
     @Test
     public void testComputeEnhancements()
-            throws EngineException {
+            throws EngineException, IOException {
         ContentItem ci = wrapAsContentItem("my doc id", SINGLE_SENTENCE);
         nerEngine.computeEnhancements(ci);
         MGraph g = ci.getMetadata();

Modified: incubator/stanbol/trunk/enhancer/engines/refactor/src/main/java/org/apache/stanbol/enhancer/engines/refactor/RefactorEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/refactor/src/main/java/org/apache/stanbol/enhancer/engines/refactor/RefactorEnhancementEngine.java?rev=1311019&r1=1311018&r2=1311019&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/refactor/src/main/java/org/apache/stanbol/enhancer/engines/refactor/RefactorEnhancementEngine.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/refactor/src/main/java/org/apache/stanbol/enhancer/engines/refactor/RefactorEnhancementEngine.java Sun Apr  8 15:12:40 2012
@@ -52,7 +52,7 @@ import org.apache.stanbol.enhancer.servi
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
-import org.apache.stanbol.enhancer.servicesapi.helper.AbstractEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
 import org.apache.stanbol.entityhub.core.utils.OsgiUtils;
 import org.apache.stanbol.entityhub.model.clerezza.RdfRepresentation;
 import org.apache.stanbol.entityhub.model.clerezza.RdfValueFactory;

Modified: incubator/stanbol/trunk/enhancer/engines/tika/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/pom.xml?rev=1311019&r1=1311018&r2=1311019&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/tika/pom.xml (original)
+++ incubator/stanbol/trunk/enhancer/engines/tika/pom.xml Sun Apr  8 15:12:40 2012
@@ -136,6 +136,17 @@
       <artifactId>slf4j-api</artifactId>
     </dependency>
 
+    <!-- Test dependencies -->
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.core</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.core</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>

Modified: incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java?rev=1311019&r1=1311018&r2=1311019&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java Sun Apr  8 15:12:40 2012
@@ -29,29 +29,33 @@ import static org.apache.tika.mime.Media
 import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.StringWriter;
+import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
+import java.nio.charset.Charset;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.Dictionary;
 import java.util.Map;
 
-import org.apache.clerezza.rdf.core.LiteralFactory;
 import org.apache.clerezza.rdf.core.UriRef;
 import org.apache.commons.io.IOUtils;
 import org.apache.felix.scr.annotations.Component;
 import org.apache.felix.scr.annotations.Properties;
 import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.Reference;
 import org.apache.felix.scr.annotations.Service;
 import org.apache.stanbol.enhancer.engines.tika.handler.MultiHandler;
 import org.apache.stanbol.enhancer.engines.tika.handler.PlainTextHandler;
 import org.apache.stanbol.enhancer.engines.tika.metadata.OntologyMappings;
 import org.apache.stanbol.enhancer.servicesapi.Blob;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
+import org.apache.stanbol.enhancer.servicesapi.ContentSink;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
-import org.apache.stanbol.enhancer.servicesapi.helper.AbstractEnhancementEngine;
-import org.apache.stanbol.enhancer.servicesapi.helper.InMemoryBlob;
+import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.detect.Detector;
 import org.apache.tika.metadata.Metadata;
@@ -92,9 +96,7 @@ public class TikaEngine 
         extends AbstractEnhancementEngine<RuntimeException,RuntimeException> 
         implements EnhancementEngine, ServiceProperties {
     private final Logger log = LoggerFactory.getLogger(TikaEngine.class);
-    
-    private final LiteralFactory lf = LiteralFactory.getInstance();
-    
+        
     public static final String SKIP_LINEBREAKS_WITHIN_CONTENT = "stanbol.engines.tika.skipLinebreaks";
     //Metadata -> Ontology mapping configuration
     public static final String MAPPING_MEDIA_RESOURCE = "stanbol.engine.tika.mapping.mediaResource";
@@ -121,18 +123,37 @@ public class TikaEngine 
      */
     public static final Integer defaultOrder = ORDERING_PRE_PROCESSING;
 
-    protected static MediaType XHTML = new MediaType("application", "xhtml+xml");
+    protected static final MediaType XHTML = new MediaType("application", "xhtml+xml");
+    protected static final Charset UTF8 = Charset.forName("UTF-8");
     
     private TikaConfig config;
     private Parser parser;
     private Detector detector;
     private OntologyMappings ontologyMappings;
+    /**
+     * The {@link ContentItemFactory} is used to create {@link Blob}s for the
+     * plain text and XHTML version of the processed ContentItem
+     */
+    @Reference
+    private ContentItemFactory ciFactory;
     
     private static class MediaTypeAndStream {
         MediaType mediaType;
         InputStream in;
     }
-   
+    /**
+     * Default constructor used by OSGI
+     */
+    public TikaEngine() {}
+    /**
+     * Used by the unit tests to init the {@link ContentItemFactory} outside
+     * an OSGI environment.
+     * @param cifactory
+     */
+    TikaEngine(ContentItemFactory cifactory) {
+        this.ciFactory = cifactory;
+    }
+
     @Override
     public int canEnhance(ContentItem ci) throws EngineException {
         return ENHANCE_ASYNC;
@@ -160,42 +181,59 @@ public class TikaEngine 
             Metadata metadata = new Metadata();
             //set the already parsed contentType
             metadata.set(Metadata.CONTENT_TYPE, mtas.mediaType.toString());
-            final StringWriter writer = new StringWriter();
+            ContentSink plainTextSink;
+            try {
+                plainTextSink = ciFactory.createContentSink(TEXT_PLAIN +"; charset="+UTF8.name());
+            } catch (IOException e) {
+                IOUtils.closeQuietly(in); //close the input stream
+                throw new EngineException("Error while initialising Blob for" +
+                		"writing the text/plain version of the parsed content",e);
+            }
+            final Writer plainTextWriter = new OutputStreamWriter(plainTextSink.getOutputStream(), UTF8);
             final ContentHandler textHandler = new BodyContentHandler( //only the Body
-                new PlainTextHandler(writer, false,skipLinebreaks)); //skip ignoreable
+                new PlainTextHandler(plainTextWriter, false,skipLinebreaks)); //skip ignoreable
             final ToXMLContentHandler xhtmlHandler;
             final ContentHandler mainHandler;
-            if(!plainMediaType.equals(XHTML)){ //do not parse XHTML from XHTML
-                xhtmlHandler = new ToXMLContentHandler();
-                mainHandler = new MultiHandler(textHandler,xhtmlHandler);
-            } else {
-                mainHandler = textHandler;
-                xhtmlHandler = null;
-            }
+            ContentSink xhtmlSink = null;
             try {
-                parser.parse(in, mainHandler, metadata, context);
-            } catch (Exception e) {
-                throw new EngineException("Unable to convert ContentItem "+
-                        ci.getUri()+" with mimeType '"+ci.getMimeType()+"' to "+
-                        "plain text!",e);
-            }
-            IOUtils.closeQuietly(in);
-            if(log.isDebugEnabled()){
-                log.debug("Plain Content: \n{}",writer.toString());
+                if(!plainMediaType.equals(XHTML)){ //do not parse XHTML from XHTML
+                    try {
+                        xhtmlSink = ciFactory.createContentSink(XHTML +"; charset="+UTF8.name());
+                    } catch (IOException e) {
+                        throw new EngineException("Error while initialising Blob for" +
+                                "writing the application/xhtml+xml version of the parsed content",e);
+                    }
+                    try {
+                        xhtmlHandler = new ToXMLContentHandler(xhtmlSink.getOutputStream(),UTF8.name());
+                    } catch (UnsupportedEncodingException e) {
+                        throw new EngineException("This system does not support the encoding "+UTF8,e);
+                    }
+                    mainHandler = new MultiHandler(textHandler,xhtmlHandler);
+                } else {
+                    mainHandler = textHandler;
+                    xhtmlHandler = null;
+                    xhtmlSink = null;
+                }
+                try {
+                    parser.parse(in, mainHandler, metadata, context);
+                } catch (Exception e) {
+                    throw new EngineException("Unable to convert ContentItem "+
+                            ci.getUri()+" with mimeType '"+ci.getMimeType()+"' to "+
+                            "plain text!",e);
+                }
+            } finally { //ensure that the writers are closed correctly
+                IOUtils.closeQuietly(in);
+                IOUtils.closeQuietly(plainTextWriter);
+                if(xhtmlSink != null){
+                    IOUtils.closeQuietly(xhtmlSink.getOutputStream());
+                }
             }
             String random = randomUUID().toString();
             UriRef textBlobUri = new UriRef("urn:tika:text:"+random);
-            ci.addPart(textBlobUri, 
-                new InMemoryBlob(writer.toString(), 
-                    TEXT_PLAIN.toString())); //string -> no encoding
+            ci.addPart(textBlobUri, plainTextSink.getBlob());
             if(xhtmlHandler != null){
-                if(log.isDebugEnabled()){
-                    log.debug("XML Content: \n{}",xhtmlHandler.toString());
-                }
                 UriRef xhtmlBlobUri = new UriRef("urn:tika:xhtml:"+random);
-                ci.addPart(xhtmlBlobUri, 
-                    new InMemoryBlob(xhtmlHandler.toString(),
-                        "application/xhtml+xml")); //string -> no encoding
+                ci.addPart(xhtmlBlobUri,  xhtmlSink.getBlob());
             }
             //add the extracted metadata
             if(log.isDebugEnabled()){
@@ -209,7 +247,6 @@ public class TikaEngine 
             }finally{
                 ci.getLock().writeLock().unlock();
             }
-            
         } //else not supported format
 
     }

Modified: incubator/stanbol/trunk/enhancer/engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java?rev=1311019&r1=1311018&r2=1311019&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java Sun Apr  8 15:12:40 2012
@@ -18,10 +18,8 @@ package org.apache.stanbol.enhancer.engi
 
 import static java.util.Collections.singleton;
 import static org.apache.commons.io.IOUtils.closeQuietly;
-import static org.apache.commons.io.IOUtils.toByteArray;
 import static org.apache.stanbol.enhancer.engines.tika.TikaEngine.XHTML;
 import static org.apache.stanbol.enhancer.servicesapi.EnhancementEngine.CANNOT_ENHANCE;
-import static org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.randomUUID;
 import static org.apache.tika.mime.MediaType.OCTET_STREAM;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
@@ -59,12 +57,14 @@ import org.apache.clerezza.rdf.ontologie
 import org.apache.clerezza.rdf.ontologies.XSD;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.io.LineIterator;
+import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
 import org.apache.stanbol.enhancer.servicesapi.Blob;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
 import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
-import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
-import org.apache.stanbol.enhancer.servicesapi.helper.InMemoryContentItem;
+import org.apache.stanbol.enhancer.servicesapi.impl.StreamSource;
+import org.apache.stanbol.enhancer.servicesapi.impl.StringSource;
 import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
 import org.junit.After;
 import org.junit.AfterClass;
@@ -79,6 +79,7 @@ import org.slf4j.LoggerFactory;
 public class TikaEngineTest {
 
     private static final Logger log = LoggerFactory.getLogger(TikaEngineTest.class);
+    private static final ContentItemFactory ciFactory = InMemoryContentItemFactory.getInstance();
     private static TikaEngine engine;
     private static MockComponentContext context;
     private static LiteralFactory lf = LiteralFactory.getInstance();
@@ -97,7 +98,7 @@ public class TikaEngineTest {
     @Before
     public void bindServices() throws ConfigurationException {
         if(engine == null){
-            engine = new TikaEngine();
+            engine = new TikaEngine(ciFactory);
             engine.activate(context);
         }
     }
@@ -395,7 +396,7 @@ public class TikaEngineTest {
     
 
     
-    public void testMetadata() throws EngineException, ParseException {
+    public void testMetadata() throws EngineException, ParseException, IOException{
         log.info(">>> testMetadata <<<");
         ContentItem ci = createContentItem("testMP3id3v24.mp3", "audio/mpeg");
         assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
@@ -411,7 +412,7 @@ public class TikaEngineTest {
         verifyValue(ci, new UriRef(NamespaceEnum.media+"hasCreator"),null,"Test Artist");
     }
     @Test
-    public void testExifMetadata() throws EngineException, ParseException {
+    public void testExifMetadata() throws EngineException, ParseException, IOException {
         log.info(">>> testExifMetadata <<<");
         String exif = "http://www.semanticdesktop.org/ontologies/2007/05/10/nexif#";
         ContentItem ci = createContentItem("testJPEG_EXIF.jpg", "image/jpeg");
@@ -478,12 +479,11 @@ public class TikaEngineTest {
      * Tests that text is not processed
      */
     @Test
-    public void testText() throws EngineException {
+    public void testText() throws EngineException, IOException {
         log.info(">>> testText <<<");
-        byte[] data = ("The Stanbol enhancer can " +
-                "detect famous cities such as Paris and people such as Bob " +
-                "Marley.").getBytes(Charset.forName("UTF-8"));
-        ContentItem ci = new InMemoryContentItem(data,"text/plain; charset=UTF-8");
+        String text = "The Stanbol enhancer can detect famous cities such as " +
+        		"Paris and people such as Bob Marley.";
+        ContentItem ci = ciFactory.createContentItem(new StringSource(text));
         Assert.assertEquals(1, ContentItemHelper.getContentParts(ci, Blob.class).size());
     }
     @Test
@@ -518,18 +518,10 @@ public class TikaEngineTest {
         assertEquals(2, ContentItemHelper.getContentParts(ci, Blob.class).size());
     }
     
-    private ContentItem createContentItem(String resourceName, String contentType){
+    private ContentItem createContentItem(String resourceName, String contentType) throws IOException {
         InputStream in = TikaEngineTest.class.getClassLoader().getResourceAsStream(resourceName);
         assertNotNull(in);
-        byte[] data;
-        try {
-            data = toByteArray(in);
-        } catch (IOException e) {
-            throw new IllegalStateException("Unable to read test data!",e);
-        }
-        closeQuietly(in);
-        UriRef ref = new UriRef("urn:contentItem:content-"+randomUUID());
-        return new InMemoryContentItem(data,contentType);
+        return ciFactory.createContentItem(new StreamSource(in,contentType));
     }
     /**
      * Tests if the parsed regex pattern are contained in any line of the parsed

Modified: incubator/stanbol/trunk/enhancer/engines/zemanta/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/zemanta/pom.xml?rev=1311019&r1=1311018&r2=1311019&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/zemanta/pom.xml (original)
+++ incubator/stanbol/trunk/enhancer/engines/zemanta/pom.xml Sun Apr  8 15:12:40 2012
@@ -84,6 +84,11 @@
 
     <!-- for tests -->
     <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.core</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
       <version>4.7</version>

Modified: incubator/stanbol/trunk/enhancer/engines/zemanta/src/main/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/zemanta/src/main/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngine.java?rev=1311019&r1=1311018&r2=1311019&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/zemanta/src/main/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngine.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/zemanta/src/main/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngine.java Sun Apr  8 15:12:40 2012
@@ -68,9 +68,9 @@ import org.apache.stanbol.enhancer.servi
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
 import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
-import org.apache.stanbol.enhancer.servicesapi.helper.AbstractEnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
 import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
 import org.osgi.framework.BundleContext;
 import org.osgi.service.cm.ConfigurationException;
 import org.osgi.service.component.ComponentContext;

Modified: incubator/stanbol/trunk/enhancer/engines/zemanta/src/test/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngineTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/zemanta/src/test/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngineTest.java?rev=1311019&r1=1311018&r2=1311019&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/zemanta/src/test/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngineTest.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/zemanta/src/test/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngineTest.java Sun Apr  8 15:12:40 2012
@@ -45,11 +45,13 @@ import org.apache.clerezza.rdf.core.Trip
 import org.apache.clerezza.rdf.core.TypedLiteral;
 import org.apache.clerezza.rdf.core.UriRef;
 import org.apache.clerezza.rdf.jena.serializer.JenaSerializerProvider;
+import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
-import org.apache.stanbol.enhancer.servicesapi.helper.InMemoryContentItem;
+import org.apache.stanbol.enhancer.servicesapi.impl.StringSource;
 import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
@@ -78,6 +80,7 @@ public class ZemantaEnhancementEngineTes
 
     static ZemantaEnhancementEngine zemantaEngine = new ZemantaEnhancementEngine();
 
+    private static final ContentItemFactory ciFactory = InMemoryContentItemFactory.getInstance();
     private static final Logger log = LoggerFactory.getLogger(ZemantaEnhancementEngineTest.class);
 
     /**
@@ -101,10 +104,10 @@ public class ZemantaEnhancementEngineTes
         zemantaEngine.deactivate(null);
     }
 
-    public static ContentItem wrapAsContentItem(final String text) {
+    public static ContentItem wrapAsContentItem(final String text) throws IOException {
     	String id = "urn:org.apache.stanbol.enhancer:test:engines.zemanta:content-item-"
             + EnhancementEngineHelper.randomUUID().toString();
-    	return new InMemoryContentItem(id, text, "text/plain");
+    	return ciFactory.createContentItem(new UriRef(id), new StringSource(text));
     }
 
     @Test

Propchange: incubator/stanbol/trunk/enhancer/generic/core/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Sun Apr  8 15:12:40 2012
@@ -0,0 +1,7 @@
+.classpath
+
+.project
+
+.settings
+
+target

Added: incubator/stanbol/trunk/enhancer/generic/core/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/generic/core/pom.xml?rev=1311019&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/generic/core/pom.xml (added)
+++ incubator/stanbol/trunk/enhancer/generic/core/pom.xml Sun Apr  8 15:12:40 2012
@@ -0,0 +1,106 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor 
+	license agreements. See the NOTICE file distributed with this work for additional 
+	information regarding copyright ownership. The ASF licenses this file to 
+	You under the Apache License, Version 2.0 (the "License"); you may not use 
+	this file except in compliance with the License. You may obtain a copy of 
+	the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
+	by applicable law or agreed to in writing, software distributed under the 
+	License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
+	OF ANY KIND, either express or implied. See the License for the specific 
+	language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+	<modelVersion>4.0.0</modelVersion>
+
+	<parent>
+		<groupId>org.apache.stanbol</groupId>
+		<artifactId>org.apache.stanbol.enhancer.parent</artifactId>
+		<version>0.9.0-incubating-SNAPSHOT</version>
+		<relativePath>../../parent</relativePath>
+	</parent>
+
+	<groupId>org.apache.stanbol</groupId>
+	<artifactId>org.apache.stanbol.enhancer.core</artifactId>
+	<packaging>bundle</packaging>
+
+	<name>Apache Stanbol Enhancer Core</name>
+	<description>The Enhancer Core providing central services and implementations</description>
+
+	<inceptionYear>2012</inceptionYear>
+
+	<scm>
+		<connection>
+            scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/trunk/enhancer/generic/core/
+        </connection>
+		<developerConnection>
+            scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/trunk/enhancer/generic/core/
+        </developerConnection>
+		<url>http://incubator.apache.org/stanbol/</url>
+	</scm>
+
+	<build>
+		<plugins>
+			<plugin>
+				<groupId>org.apache.felix</groupId>
+				<artifactId>maven-bundle-plugin</artifactId>
+				<extensions>true</extensions>
+				<configuration>
+					<instructions>
+						<Export-Package>
+							org.apache.stanbol.enhancer.contentitem.inmemory;version=${project.version},
+							org.apache.stanbol.enhancer.contentitem.file;version=${project.version}
+						</Export-Package>
+						<Private-Package>
+							org.apache.stanbol.enhancer.chainmanager.impl;version=${project.version},
+							org.apache.stanbol.enhancer.enginemanager.impl;version=${project.version}
+						</Private-Package>
+					</instructions>
+				</configuration>
+			</plugin>
+			<plugin>
+				<groupId>org.apache.felix</groupId>
+				<artifactId>maven-scr-plugin</artifactId>
+			</plugin>
+		</plugins>
+	</build>
+
+	<dependencies>
+		<dependency>
+			<groupId>org.apache.stanbol</groupId>
+			<artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.stanbol</groupId>
+			<artifactId>org.apache.stanbol.commons.indexedgraph</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.clerezza</groupId>
+			<artifactId>rdf.core</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>commons-io</groupId>
+			<artifactId>commons-io</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.felix</groupId>
+			<artifactId>org.apache.felix.scr.annotations</artifactId>
+		</dependency>
+
+		<!-- for tests -->
+		<dependency>
+			<groupId>junit</groupId>
+			<artifactId>junit</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>org.slf4j</groupId>
+			<artifactId>slf4j-simple</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.stanbol</groupId>
+			<artifactId>org.apache.stanbol.enhancer.test</artifactId>
+		</dependency>
+	</dependencies>
+
+</project>

Propchange: incubator/stanbol/trunk/enhancer/generic/core/pom.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/enhancer/generic/core/src/main/java/org/apache/stanbol/enhancer/chainmanager/impl/ChainManagerImpl.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/generic/core/src/main/java/org/apache/stanbol/enhancer/chainmanager/impl/ChainManagerImpl.java?rev=1311019&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/generic/core/src/main/java/org/apache/stanbol/enhancer/chainmanager/impl/ChainManagerImpl.java (added)
+++ incubator/stanbol/trunk/enhancer/generic/core/src/main/java/org/apache/stanbol/enhancer/chainmanager/impl/ChainManagerImpl.java Sun Apr  8 15:12:40 2012
@@ -0,0 +1,51 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.stanbol.enhancer.chainmanager.impl;
+
+import org.apache.felix.scr.annotations.Activate;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.Deactivate;
+import org.apache.felix.scr.annotations.Service;
+import org.apache.stanbol.enhancer.servicesapi.ChainManager;
+import org.apache.stanbol.enhancer.servicesapi.impl.ChainsTracker;
+import org.osgi.service.component.ComponentContext;
+
+/**
+ * Implementation of the ChainManager interface as OSGI component based
+ * on {@link ChainsTracker}.
+ * 
+ * @author Rupert Westenthaler
+ *
+ */
+@Component(immediate=true,enabled=true)
+@Service(value=ChainManager.class)
+public class ChainManagerImpl extends ChainsTracker implements ChainManager {
+
+    public ChainManagerImpl(){
+        super();
+    }
+    
+    @Activate
+    public void activate(ComponentContext ctx){
+        initChainTracker(ctx.getBundleContext(), null, null);
+        open();
+    }
+    @Deactivate
+    public void deactivate(ComponentContext ctx){
+        close();
+    }
+}

Added: incubator/stanbol/trunk/enhancer/generic/core/src/main/java/org/apache/stanbol/enhancer/contentitem/file/FileContentItemFactory.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/generic/core/src/main/java/org/apache/stanbol/enhancer/contentitem/file/FileContentItemFactory.java?rev=1311019&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/generic/core/src/main/java/org/apache/stanbol/enhancer/contentitem/file/FileContentItemFactory.java (added)
+++ incubator/stanbol/trunk/enhancer/generic/core/src/main/java/org/apache/stanbol/enhancer/contentitem/file/FileContentItemFactory.java Sun Apr  8 15:12:40 2012
@@ -0,0 +1,407 @@
+package org.apache.stanbol.enhancer.contentitem.file;
+
+import static org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper.DEFAULT_CONTENT_ITEM_PREFIX;
+import static org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper.SHA1;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.commons.io.IOUtils;
+import org.apache.felix.scr.annotations.Activate;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.Deactivate;
+import org.apache.felix.scr.annotations.Properties;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.Service;
+import org.apache.stanbol.commons.indexedgraph.IndexedMGraph;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
+import org.apache.stanbol.enhancer.servicesapi.ContentSink;
+import org.apache.stanbol.enhancer.servicesapi.ContentSource;
+import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
+import org.apache.stanbol.enhancer.servicesapi.impl.AbstractContentItemFactory;
+import org.apache.stanbol.enhancer.servicesapi.impl.ContentItemImpl;
+import org.osgi.framework.Constants;
+import org.osgi.service.cm.ConfigurationException;
+import org.osgi.service.component.ComponentContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * ContentItemFactory that stores the parsed content in Files. This Factory
+ * should be preferred to the InMemoryContentItemFactory in cases where content
+ * is parsed to the Enhancer that can not be kept in Memory.
+ * @author Rupert Westenthaler
+ *
+ */
+@Component(inherit=true)
+@Service(value=ContentItemFactory.class)
+@Properties(value={
+    @Property(name=FileContentItemFactory.PARAM_BASE_DIR,value=""),
+    @Property(name=Constants.SERVICE_RANKING, intValue=50)
+})
+public class FileContentItemFactory extends AbstractContentItemFactory implements ContentItemFactory {
+
+    
+    private final Logger log = LoggerFactory.getLogger(FileContentItemFactory.class);
+    
+    public static final String DEFAULT_BINARY_MIMETYPE = "application/octet-stream";
+
+    public static final String PARAM_BASE_DIR = "stanbol.enhancer.contentitem.file.baseDir";
+    
+    private static FileContentItemFactory instance;
+    
+    /**
+     * Base directory used to create temp files
+     */
+    private File baseDir;
+    
+    /**
+     * Getter for the singleton instance of this factory. Within an OSGI 
+     * environment this should not be used as this Factory is also registered
+     * as OSGI service.
+     * @return the singleton instance using the system default temporary file
+     * directory.
+     */
+    public static FileContentItemFactory getInstance(){
+        if(instance == null){
+            instance = new FileContentItemFactory();
+        }
+        return instance;
+    }
+    
+    
+    public FileContentItemFactory() {
+        super(false); //dereference all data on construction
+    }
+    public FileContentItemFactory(File baseDir) throws IOException {
+        this();
+        if(baseDir != null){
+            this.baseDir = baseDir;
+            initBaseDir();
+        }
+    }
+    
+    @Activate
+    protected void activate(ComponentContext ctx) throws ConfigurationException {
+        Object value = ctx.getProperties().get(PARAM_BASE_DIR);
+        if(value != null && !value.toString().isEmpty()){
+            String home = ctx.getBundleContext().getProperty("sling.home");
+            if(home != null){
+                baseDir = new File(home,value.toString());
+            } else {
+                baseDir = new File(value.toString());
+            }
+            try {
+                initBaseDir();
+            } catch (Exception e) {
+                new ConfigurationException(PARAM_BASE_DIR, "Unable to initialise"
+                    + "configured base Directory '"+value+"' (absolute path: '"
+                    + baseDir.getAbsolutePath()+"')!",e);
+            }
+        }
+    }
+
+
+    /**
+     * Internally used to initialise the {@link #baseDir}
+     * @throws IllegalStateException if the parsed Directory already exists
+     * but is not an directory.
+     * @throws IOException if the configured directory does not exists but
+     * could not be created
+     */
+    private void initBaseDir() throws IOException {
+        if(baseDir.exists() && !baseDir.isDirectory()){
+            baseDir = null;
+            throw new IllegalArgumentException("A File with the configured Directory '"
+                + baseDir.getAbsolutePath()+ "' already exists, but is not a Directory!");
+        }
+        log.info("activate {} with temp directory {}",getClass().getSimpleName(),
+            baseDir.getAbsolutePath());
+        if(!baseDir.isDirectory()){
+            if(!baseDir.mkdirs()){
+                throw new IOException("Unable to create"
+                		+ "temp-directory '"+baseDir.getAbsolutePath()+")!");
+            }
+        }
+    }
+    
+    @Deactivate
+    protected void deactivate(ComponentContext ctx){
+        baseDir = null;
+    }
+        
+    @Override
+    protected ContentItem createContentItem(UriRef id, Blob blob, MGraph metadata) {
+        return new FileContentItem(id, blob, metadata);
+    }
+
+    @Override
+    protected ContentItem createContentItem(String prefix, Blob blob, MGraph metadata) {
+        return new FileContentItem(prefix, blob, metadata);
+    }
+
+    @Override
+    public Blob createBlob(ContentSource source) throws IOException {
+        return new FileBlob(source);
+    }
+    @Override
+    public ContentSink createContentSink(String mediaType) throws IOException {
+        return new FileContentSink(mediaType);
+    }
+    
+    
+    protected File createTempFile(String prefix){
+        File tmpFile;
+        try {
+            tmpFile = File.createTempFile(prefix, null, baseDir);
+        } catch (IOException e) {
+            if(baseDir != null){
+                log.warn("Unable to create temp-file in directory "+baseDir
+                    + " (try to create in system temp");
+                try {
+                    tmpFile = File.createTempFile(prefix, null, null);
+                } catch (IOException e1) {
+                    throw new IllegalStateException("Unable to create temp-file" +
+                            "in '"+baseDir+"' and system temp directory",e1);
+                }
+            } else {
+                throw new IllegalStateException("Unable to create temp-file",e);
+            }
+        }
+        tmpFile.deleteOnExit();
+        return tmpFile;
+    }
+
+    public class FileContentSink implements ContentSink {
+
+        private final WriteableFileBlob blob;
+        
+        protected FileContentSink(String mediaType){
+            blob = new WriteableFileBlob(mediaType);
+        }
+        @Override
+        public OutputStream getOutputStream() {
+            return blob.getOutputStream();
+        }
+
+        @Override
+        public Blob getBlob() {
+            return blob;
+        }
+        
+    }
+    
+    public class WriteableFileBlob implements Blob {
+        
+        private final File file;
+        private final OutputStream out;
+        private String mimeType;
+        private Map<String,String> parameters;
+
+        protected WriteableFileBlob(String mediaType){
+            this.file = createTempFile("blob");
+            try {
+                this.out = new FileOutputStream(file);
+            } catch (FileNotFoundException e) {
+                throw new IllegalStateException("temporary file '"
+                        + file.getAbsolutePath()+"' was not created as expected!",e);
+            }
+            Map<String,String> parameters;
+            if(mediaType == null){
+                this.mimeType = DEFAULT_BINARY_MIMETYPE;
+                parameters = new HashMap<String,String>();
+            } else {
+                parameters = ContentItemHelper.parseMimeType(mediaType);
+                this.mimeType = parameters.remove(null);
+            }
+            this.parameters = Collections.unmodifiableMap(parameters);
+        }
+        /**
+         * Used by the {@link FileContentSink} implementation
+         * @return
+         */
+        protected final OutputStream getOutputStream(){
+            return out;
+        }
+        
+        @Override
+        public String getMimeType() {
+            return mimeType;
+        }
+
+        @Override
+        public InputStream getStream() {
+            try {
+                return new FileInputStream(file);
+            } catch (FileNotFoundException e) {
+                throw new IllegalStateException("temporary file '"
+                        + file.getAbsolutePath()+"' no longer present!",e);
+            }
+        }
+
+        @Override
+        public Map<String,String> getParameter() {
+            return parameters;
+        }
+
+        @Override
+        public long getContentLength() {
+            return file.length();
+        }
+        @Override
+        protected void finalize() throws Throwable {
+            IOUtils.closeQuietly(out);
+            file.delete();
+            super.finalize();
+        }
+    }
+    
+    /**
+     * Blob implementation that store the data in a temp file. NOTE that
+     * all the other information such as {@link #getMimeType()},
+     * {@link #getParameter()} are kept in memory. So this can NOT be used
+     * to persist a ContentItem!
+     * @author Rupert Westenthaler
+     *
+     */
+    public class FileBlob implements Blob {
+
+        private final File file;
+        /**
+         * This implementation generates the sha1 while copying the data
+         * in the constructor to the file to avoid reading the data twice if a
+         * {@link ContentItem} is created based on a Blob.
+         */
+        private final String sha1;
+
+        private final String mimeType;
+
+        private final Map<String,String> parameters;
+        
+        protected FileBlob(ContentSource source) throws IOException {
+            if(source == null){
+                throw new IllegalArgumentException("The parsed ConentSource MUST NOT be NULL!");
+            }
+            file = createTempFile("blob");
+            OutputStream out = null;
+            InputStream in = null;
+            try {
+                out = new FileOutputStream(file);
+                in = source.getStream();
+                sha1 = ContentItemHelper.streamDigest(in, out, SHA1);
+            } finally {
+                IOUtils.closeQuietly(in);
+                IOUtils.closeQuietly(out);
+            }
+            Map<String,String> parameters;
+            if(source.getMediaType() == null){
+                this.mimeType = DEFAULT_BINARY_MIMETYPE;
+                parameters = new HashMap<String,String>();
+            } else {
+                parameters = ContentItemHelper.parseMimeType(source.getMediaType());
+                this.mimeType = parameters.remove(null);
+            }
+            this.parameters = Collections.unmodifiableMap(parameters);
+        }
+        /**
+         * The tmp file representing this Blob
+         * @return the file
+         */
+        protected final File getFile() {
+            return file;
+        }
+
+        /**
+         * The sha1 of this Blob - typically used to generate the default IDs
+         * of a ContentItem
+         * @return the sha1
+         */
+        protected final String getSha1() {
+            return sha1;
+        }
+        
+        @Override
+        public String getMimeType() {
+            return mimeType;
+        }
+
+        @Override
+        public InputStream getStream() {
+            try {
+                return new FileInputStream(file);
+            } catch (FileNotFoundException e) {
+                throw new IllegalStateException("temporary file '"
+                        + file.getAbsolutePath()+"' no longer present!",e);
+            }
+        }
+
+        @Override
+        public Map<String,String> getParameter() {
+            return parameters;
+        }
+
+        @Override
+        public long getContentLength() {
+            return file.length();
+        }
+        @Override
+        protected void finalize() throws Throwable {
+            //delete the file
+            file.delete();
+        }
+    }
+    /**
+     * Utility that returns the ID for a FileContentItem based on
+     * {@link FileBlob#getSha1()}.<p>
+     * This method is part of the {@link FileContentItemFactory} because it
+     * is used in the super(..) call of the {@link FileContentItem}. Normally
+     * it would be a static method of the inner class (what is a similar scope
+     * as a non static method in the outer class).
+     * @param blob the blob
+     * @return the id
+     * @throws IllegalArgumentException if the parsed {@link Blob} or the
+     * prefix is <code>null</code>
+     * @throws IllegalStateException if the parsed blob is not an {@link FileBlob}
+     */
+    protected UriRef getDefaultUri(Blob blob, String prefix) {
+        if(blob == null){
+            throw new IllegalArgumentException("The parsed Blob MUST NOT be NULL!");
+        }
+        if(prefix == null){
+            throw new IllegalArgumentException("The parsed prefix MUST NOT be NULL!");
+        }
+        if(blob instanceof FileBlob) {
+            return new UriRef(prefix+SHA1.toLowerCase()+ '-' + ((FileBlob)blob).getSha1());
+        } else {
+            throw new IllegalStateException("FileContentItem expects FileBlobs to be used" +
+                    "as Blob implementation (found: "+blob.getClass()+")!");
+        }
+    }
+
+    protected class FileContentItem extends ContentItemImpl implements ContentItem {
+        
+        public FileContentItem(UriRef id, Blob blob,MGraph metadata) {
+            super(id == null ? getDefaultUri(blob, DEFAULT_CONTENT_ITEM_PREFIX) : id, blob,
+                    metadata == null ? new IndexedMGraph() : metadata);
+        }
+        public FileContentItem(String prefix, Blob blob,MGraph metadata) {
+            super(getDefaultUri(blob, prefix), blob,
+                metadata == null ? new IndexedMGraph() : metadata);
+        }
+
+        
+    }
+}

Added: incubator/stanbol/trunk/enhancer/generic/core/src/main/java/org/apache/stanbol/enhancer/contentitem/inmemory/InMemoryBlob.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/generic/core/src/main/java/org/apache/stanbol/enhancer/contentitem/inmemory/InMemoryBlob.java?rev=1311019&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/generic/core/src/main/java/org/apache/stanbol/enhancer/contentitem/inmemory/InMemoryBlob.java (added)
+++ incubator/stanbol/trunk/enhancer/generic/core/src/main/java/org/apache/stanbol/enhancer/contentitem/inmemory/InMemoryBlob.java Sun Apr  8 15:12:40 2012
@@ -0,0 +1,190 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.stanbol.enhancer.contentitem.inmemory;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PipedInputStream;
+import java.nio.channels.Pipe;
+import java.nio.charset.Charset;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+
+import org.apache.commons.io.IOUtils;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.ContentReference;
+import org.apache.stanbol.enhancer.servicesapi.ContentSink;
+import org.apache.stanbol.enhancer.servicesapi.ContentSource;
+import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
+import org.apache.stanbol.enhancer.servicesapi.impl.ByteArraySource;
+import org.apache.stanbol.enhancer.servicesapi.impl.StreamSource;
+import org.apache.stanbol.enhancer.servicesapi.impl.StringSource;
+
+/**
+ * Holds the parsed data in an byte array. <p>
+ * In case a byte[]  is used to construct the parsed data are NOT copied. In
+ * case of an {@link ByteArrayOutputStream} data are retrieved from the stream
+ * on each call to {@link #getStream()} if new data where added to the output
+ * stream in the meantime.<p>
+ * Also NOTE that all public constructors are deprecated. Users are
+ * encouraged to use the {@link InMemoryContentItemFactory} with a fitting
+ * {@link ContentSource} or {@link ContentReference} to create {@link Blob}
+ * instances.<p>
+ * NOTES regarding the deprecated Constructors:<br>
+ * Strings are encoded as UTF-8 and {@link InputStream} are copied by using 
+ * {@link IOUtils#toByteArray(InputStream)}.<p>
+ * The default mime-types (if <code>null</code> is parsed as mimeType) are for
+ * Strings "text/plain" and in all other cases "application/octet-stream".
+ */
+public class InMemoryBlob implements Blob {
+    private static final Charset UTF8 = Charset.forName("utf-8");
+    public static final String DEFAULT_TEXT_MIMETYPE = "text/plain";
+    public static final String DEFAULT_BINARY_MIMETYPE = "application/octet-stream";
+
+    protected final String mimeType;
+    protected final Map<String,String> parameters;
+    
+    private ByteArrayOutputStream bao;
+    private int size = -1;
+    private byte[] data;
+    
+	/**
+	 * Creates an {@link InMemoryBlob} for the parsed String. If a "charset"
+	 * parameter is present for the parsed mimeType it is replaced with "UTF-8"
+	 * used to encode the Sting as byte[].
+	 * @param text the text
+	 * @param mimeType the mimeType. If <code>null</code> "text/plain" is used
+	 * as default
+	 * @deprecated use {@link InMemoryContentItemFactory#createBlob(ContentSource)} 
+	 * with a {@link StringSource} instead
+	 */
+	public InMemoryBlob(String text, String mimeType){
+	    this(text.getBytes(UTF8),mimeType != null ? mimeType : DEFAULT_TEXT_MIMETYPE,
+	            Collections.singletonMap("charset", UTF8.name()));
+	}
+	/**
+	 * Creates an instance for the parsed {@link InputStream}. Data are copied
+	 * to a byte array. The parsed stream is closed after copying the data.
+	 * @param in the {@link InputStream}. MUST NOT be <code>null</code>
+	 * @param mimeType the mime-type. If <code>null</code>  "application/octet-stream"
+	 * is used as default.
+	 * @throws IOException indicates an error while reading from the parsed stream
+     * @deprecated use {@link InMemoryContentItemFactory#createBlob(ContentSource)} with
+     * a {@link StreamSource} instead
+	 */
+	public InMemoryBlob(InputStream in,String mimeType) throws IOException {
+	    this(IOUtils.toByteArray(in),mimeType);
+	    IOUtils.closeQuietly(in);
+	}
+	/**
+	 * Creates an instance for the parsed byte array. The array is NOT copied
+	 * therefore changes within that array will be reflected to components
+	 * reading the data from this Blob.
+	 * @param data the data. MIST NOT be <code>null</code>
+	 * @param mimeType the mime-type. If <code>null</code>  "application/octet-stream"
+     * is used as default.
+     * @deprecated use {@link InMemoryContentItemFactory#createBlob(ContentSource)} 
+     * with a {@link ByteArraySource} instead
+	 */
+	public InMemoryBlob(byte[] data, String mimeType) {
+	    this(data,mimeType,null);
+	}
+	/**
+	 * Constructor that allows to create a byte array backed Blob based on a
+	 * fixed set of parsed data.
+	 * @param data the data (content of the Blob)
+	 * @param mimeType the mimeType (<code>null</code> if not know; supports parameters)
+	 * @param parsedParameters additional parameters (will override parameters parsed
+	 * with the mimeType; <code>null</code> or mepty map if none)
+	 */
+    protected InMemoryBlob(byte[] data, String mimeType,Map<String,String> parsedParameters) {
+        this(mimeType,parsedParameters);
+        if(data == null){
+            throw new IllegalArgumentException("The parsed content MUST NOT be NULL!");
+        }
+        this.data = data;
+	}
+    /**
+     * Allows to create a in-memory {@link Blob} that represents the data as
+     * written to the parsed {@link ByteArrayOutputStream}. NOTE that
+     * {@link #getStream()} will return an {@link InputStream} over the 
+     * {@link ByteArrayOutputStream#toByteArray() available bytes} at the
+     * time of the call. Therefore it will return partial contents if not yet
+     * all data where written to the parsed output stream!<p>
+     * To workaround this one would need to use a pipe with an infinite buffer
+     * that can be read my multiple {@link InputStream}s. However currently this
+     * feature is not required by the {@link ContentSink} interface.
+     * @param bao the {@link ByteArrayOutputStream}
+     * @param mimeType the mimeType (<code>null</code> if not know; supports parameters)
+     * @param parsedParameters additional parameters (will override parameters parsed
+     * with the mimeType; <code>null</code> or mepty map if none)
+     * @throws IllegalArgumentException if the parsed output stream is <code>null</code>
+     */
+    protected InMemoryBlob(ByteArrayOutputStream bao,String mimeType,Map<String,String> parsedParameters){
+        this(mimeType,parsedParameters);
+        if(bao == null){
+            throw new IllegalArgumentException("The parsed ByteArrayOutputStream MUST NOT be NULL!");
+        }
+        this.bao = bao;
+    }
+    /**
+     * Internally used to correctly init the parsed mimeType and parameter
+     * @param mimeType
+     * @param parsedParameters
+     */
+    private InMemoryBlob(String mimeType,Map<String,String> parsedParameters){
+        Map<String,String> parameters;
+        if(mimeType == null){
+            this.mimeType = DEFAULT_BINARY_MIMETYPE;
+            parameters = new HashMap<String,String>();
+        } else {
+            parameters = ContentItemHelper.parseMimeType(mimeType);
+            this.mimeType = parameters.remove(null);
+        }
+        if(parsedParameters != null){
+            parameters.putAll(parsedParameters);
+        }
+        this.parameters = Collections.unmodifiableMap(parameters);
+        
+    }
+	@Override
+	public final InputStream getStream() {
+	    //if a ByteArrayOutputStream is used to stream the data to the blob,
+	    //than check if we need to create a new array for creating the stream.
+	    if(bao != null && bao.size() != size){
+	        data = bao.toByteArray();
+	        size = data.length;
+	    }
+		return new ByteArrayInputStream(data);
+	}
+	@Override
+	public final long getContentLength() {
+	    return bao != null ? bao.size() : data.length;
+	}
+    @Override
+    public final String getMimeType() {
+        return mimeType;
+    }
+    @Override
+    public final Map<String,String> getParameter() {
+        return parameters;
+    }
+}

Added: incubator/stanbol/trunk/enhancer/generic/core/src/main/java/org/apache/stanbol/enhancer/contentitem/inmemory/InMemoryContentItem.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/generic/core/src/main/java/org/apache/stanbol/enhancer/contentitem/inmemory/InMemoryContentItem.java?rev=1311019&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/generic/core/src/main/java/org/apache/stanbol/enhancer/contentitem/inmemory/InMemoryContentItem.java (added)
+++ incubator/stanbol/trunk/enhancer/generic/core/src/main/java/org/apache/stanbol/enhancer/contentitem/inmemory/InMemoryContentItem.java Sun Apr  8 15:12:40 2012
@@ -0,0 +1,136 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.stanbol.enhancer.contentitem.inmemory;
+
+
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
+import org.apache.stanbol.commons.indexedgraph.IndexedMGraph;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
+import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
+import org.apache.stanbol.enhancer.servicesapi.impl.ByteArraySource;
+import org.apache.stanbol.enhancer.servicesapi.impl.ContentItemImpl;
+import org.apache.stanbol.enhancer.servicesapi.impl.StringSource;
+
+
+/**
+ * ContentItem implementation that holds a complete copy of the data in
+ * memory. Internally it uses {@link InMemoryBlob} to store the content and
+ * an {@link SimpleMGraph} for the metadata.
+ * <p>
+ * This implementation can be used independently of any store implementation and
+ * is suitable for stateless processing.
+ */
+public class InMemoryContentItem extends ContentItemImpl {
+
+//Do not allow to create a ContentItem without a content
+//    public InMemoryContentItem(String id) {
+//        this(id, null, null, null);
+//    }
+
+    /**
+     * 
+     * @param content
+     * @param mimeType
+     * @deprecated use {@link InMemoryContentItemFactory#createContentItem(ContentItemFactory.ContentSource)} 
+     * with a {@link ByteArraySource}
+     */
+    public InMemoryContentItem(byte[] content, String mimeType) {
+        this((UriRef)null,new InMemoryBlob(content, mimeType),null);
+    }
+    /**
+     * 
+     * @param id
+     * @param content
+     * @param mimeType
+     * @deprecated use {@link InMemoryContentItemFactory#createContentItem(UriRef, ContentSource)}
+     * with a {@link StringSource} instead.
+     */
+    public InMemoryContentItem(String id, String content, String mimeType) {
+		this(id, new InMemoryBlob(content, mimeType),null);
+	}
+    /**
+     * 
+     * @param id
+     * @param content
+     * @param mimetype
+     * @deprecated use {@link InMemoryContentItemFactory#createContentItem(UriRef, ContentSource)}
+     * with a {@link ByteArraySource} instead.
+     */
+    public InMemoryContentItem(String id, byte[] content, String mimetype) {
+        this(id,new InMemoryBlob(content, mimetype),null);
+    }
+
+    /**
+     * 
+     * @param id
+     * @param content
+     * @param mimetype
+     * @deprecated use {@link InMemoryContentItemFactory#createContentItem(UriRef, ContentSource,MGraph)}
+     * with a {@link ByteArraySource} instead.
+     */
+    public InMemoryContentItem(String uriString, byte[] content, String mimeType,
+            MGraph metadata) {
+    	this(uriString != null? new UriRef(uriString) : null ,
+    	        new InMemoryBlob(content, mimeType),
+    	        metadata);
+    }
+    /**
+     * 
+     * @param id
+     * @param content
+     * @param mimetype
+     * @deprecated use {@link InMemoryContentItemFactory#createContentItem(UriRef, ContentSource,MGraph)}
+     * with a {@link StringSource} instead.
+     */
+    public InMemoryContentItem(UriRef uriRef, String content, String mimeType) {
+		this(uriRef, new InMemoryBlob(content, mimeType), null);
+	}
+    /**
+     * 
+     * @param id
+     * @param content
+     * @param mimetype
+     * @deprecated use {@link InMemoryContentItemFactory#createContentItem(UriRef, ContentSource,MGraph)}
+     * with a {@link ByteArraySource} instead.
+     */
+    public InMemoryContentItem(UriRef uri, byte[] content, String mimeType, MGraph metadata) {
+        this(uri, new InMemoryBlob(content, mimeType),metadata);
+    }
+    protected InMemoryContentItem(String uriString, Blob blob, MGraph metadata) {
+        this(uriString != null ? new UriRef(uriString) : null, blob, metadata);
+    }
+    protected InMemoryContentItem(UriRef uri, Blob blob, MGraph metadata) {
+        super(uri == null ? ContentItemHelper.makeDefaultUrn(blob): uri,blob,
+                metadata == null ? new IndexedMGraph() : metadata);
+    }
+
+    /**
+     * 
+     * @param id
+     * @param content
+     * @param mimetype
+     * @deprecated use {@link InMemoryContentItemFactory#createContentItem(ContentSource)}
+     * with a {@link StringSource} instead.
+     */
+	protected static final InMemoryContentItem fromString(String content) {
+        return new InMemoryContentItem(content.getBytes(), "text/plain");
+    }
+
+}