You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2011/06/26 16:50:34 UTC

svn commit: r1139801 - in /incubator/stanbol/trunk/commons: opennlp/ opennlp/src/main/java/org/apache/stanbol/commons/opennlp/ stanboltools/bundledatafileprovider/ stanboltools/bundledatafileprovider/src/main/java/org/apache/stanbol/commons/stanboltool...

Author: rwesten
Date: Sun Jun 26 14:50:34 2011
New Revision: 1139801

URL: http://svn.apache.org/viewvc?rev=1139801&view=rev
Log:
STANBOL-234 and STANBOL-235: It looks like that in the last commit some of the resources where not included. In addition this fixes two issues in the pom file of the commons/opennlp pom file

Added:
    incubator/stanbol/trunk/commons/opennlp/src/main/java/org/apache/stanbol/commons/opennlp/OpenNLP.java   (with props)
    incubator/stanbol/trunk/commons/stanboltools/bundledatafileprovider/src/main/java/org/apache/stanbol/commons/stanboltools/datafileprovider/bundle/impl/Activator.java   (with props)
    incubator/stanbol/trunk/commons/stanboltools/bundledatafileprovider/src/main/java/org/apache/stanbol/commons/stanboltools/datafileprovider/bundle/impl/BundleDataFileProvider.java   (with props)
    incubator/stanbol/trunk/commons/stanboltools/bundledatafileprovider/src/main/java/org/apache/stanbol/commons/stanboltools/datafileprovider/bundle/impl/DataBundleInstaller.java   (with props)
Modified:
    incubator/stanbol/trunk/commons/opennlp/   (props changed)
    incubator/stanbol/trunk/commons/opennlp/pom.xml
    incubator/stanbol/trunk/commons/stanboltools/bundledatafileprovider/   (props changed)

Propchange: incubator/stanbol/trunk/commons/opennlp/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Sun Jun 26 14:50:34 2011
@@ -0,0 +1,7 @@
+.settings
+
+target
+
+.project
+
+.classpath

Modified: incubator/stanbol/trunk/commons/opennlp/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/commons/opennlp/pom.xml?rev=1139801&r1=1139800&r2=1139801&view=diff
==============================================================================
--- incubator/stanbol/trunk/commons/opennlp/pom.xml (original)
+++ incubator/stanbol/trunk/commons/opennlp/pom.xml Sun Jun 26 14:50:34 2011
@@ -67,7 +67,7 @@
               opennlp.model,
               opennlp.perceptron,
               opennlp.tools.*,
-              org.apache.stanbol.commons.opennlp,
+              org.apache.stanbol.commons.opennlp
             </Export-Package>
             <Private-Package>
               org.apache.stanbol.commons.opennlp.impl.*
@@ -75,7 +75,6 @@
             <Embed-Dependency>opennlp-tools,opennlp-maxent</Embed-Dependency>
             <Embed-Transitive>true</Embed-Transitive>
             <Import-Package>
-              org.apache.stanbol.defaultdata.opennlp,
               !net.didion.*,
               *
             </Import-Package>

Added: incubator/stanbol/trunk/commons/opennlp/src/main/java/org/apache/stanbol/commons/opennlp/OpenNLP.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/commons/opennlp/src/main/java/org/apache/stanbol/commons/opennlp/OpenNLP.java?rev=1139801&view=auto
==============================================================================
--- incubator/stanbol/trunk/commons/opennlp/src/main/java/org/apache/stanbol/commons/opennlp/OpenNLP.java (added)
+++ incubator/stanbol/trunk/commons/opennlp/src/main/java/org/apache/stanbol/commons/opennlp/OpenNLP.java Sun Jun 26 14:50:34 2011
@@ -0,0 +1,428 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.commons.opennlp;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+import java.nio.charset.Charset;
+import java.util.HashMap;
+import java.util.Map;
+
+import opennlp.tools.chunker.ChunkerModel;
+import opennlp.tools.namefind.TokenNameFinderModel;
+import opennlp.tools.postag.POSModel;
+import opennlp.tools.sentdetect.SentenceModel;
+import opennlp.tools.tokenize.SimpleTokenizer;
+import opennlp.tools.tokenize.Tokenizer;
+import opennlp.tools.tokenize.TokenizerME;
+import opennlp.tools.tokenize.TokenizerModel;
+import opennlp.tools.util.InvalidFormatException;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.Reference;
+import org.apache.felix.scr.annotations.Service;
+import org.apache.stanbol.commons.stanboltools.datafileprovider.DataFileProvider;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Core of our EnhancementEngine, separated from the OSGi service to make it easier to test this.
+ */
+@Component(immediate=true)
+@Service(value=OpenNLP.class)
+public class OpenNLP {
+    private static final String DOWNLOAD_ROOT = "http://opennlp.sourceforge.net/models-1.5/";
+
+    /**
+     * The logger
+     */
+    private final Logger log = LoggerFactory.getLogger(getClass());
+    
+    @Reference
+    private DataFileProvider dataFileProvider;
+     /**
+     * Map holding the already built models
+     * TODO: change to use a WeakReferenceMap
+     */
+    protected Map<String,Object> models = new HashMap<String,Object>();
+//    /**
+//     * Holds a map of {@link #registerModelLocation(BundleContext, String...) registered}
+//     * model location. The bundle symbolic name is used as key to avoid a hard 
+//     * reference to the parsed {@link BundleContext}.
+//     */
+//    protected Map<String,ModelLocation> modelLocations = new HashMap<String,ModelLocation>();
+
+//    private static class ModelLocation {
+//        BundleContext bundleContext;
+//        String[] paths;
+//        BundleResourceProvider provider;
+//    }
+    
+    /**
+     * Default constructor
+     */
+    public OpenNLP(){ 
+        super(); 
+    }
+    /**
+     * Constructor intended to be used when running outside an OSGI environment
+     * (e.g. when used for UnitTests)
+     * @param dataFileProvider the dataFileProvider used to load Model data.
+     */
+    public OpenNLP(DataFileProvider dataFileProvider){
+        this();
+        this.dataFileProvider = dataFileProvider;
+    }
+    /**
+     * Builds a a model for the tokenizing sentenced in a text with the given
+     * language 
+     * @param language the language
+     * @return the model or <code>null</code> if no model data are found
+     * @throws InvalidFormatException in case the found model data are in the wrong format
+     * @throws IOException on any error while reading the model data
+     */
+    public SentenceModel buildSentenceModel(String language) throws InvalidFormatException, IOException {
+        return initModel(String.format("%s-sent.bin", language),
+            SentenceModel.class);
+    }
+    /**
+     * Build a named entity finder model for the parsed entity type and language
+     * @param type the type of the named entities to find (person, organization)
+     * @param language the language
+     * @return the model or <code>null</code> if no model data are found
+     * @throws InvalidFormatException in case the found model data are in the wrong format
+     * @throws IOException on any error while reading the model data
+     */
+    public TokenNameFinderModel buildNameModel(String type, String language) throws InvalidFormatException, IOException {
+        return initModel(String.format("%s-ner-%s.bin", language, type),
+            TokenNameFinderModel.class);
+    }
+    /**
+     * Builds a tokenizer model for the parsed language
+     * @param language the language
+     * @return the model or <code>null</code> if no model data are found
+     * @throws InvalidFormatException in case the found model data are in the wrong format
+     * @throws IOException on any error while reading the model data
+     */
+    public TokenizerModel buildTokenizerModel(String language) throws InvalidFormatException, IOException {
+        return initModel(String.format("%s-token.bin", language),TokenizerModel.class);
+    }
+    /**
+     * Tries to built a {@link TokenizerModel} for the parsed language. If this
+     * succeeds a {@link TokenizerME} instance is returned. If no model can be
+     * loaded the {@link SimpleTokenizer} instance is returned.
+     * @param language the language or <code>null</code> to build a 
+     * {@link SimpleTokenizer}
+     * @return the {@link Tokenizer} for the parsed language.
+     */
+    public Tokenizer getTokenizer(String language) {
+        Tokenizer tokenizer = null;
+        if(language != null){
+            try {
+                tokenizer = new TokenizerME(buildTokenizerModel(language));
+            } catch (InvalidFormatException e) {
+                log.warn("Unable to load Tokenizer Model for "+language+": " +
+                		"Will use Simple Tokenizer instead",e);
+            } catch (IOException e) {
+                log.warn("Unable to load Tokenizer Model for "+language+": " +
+                    "Will use Simple Tokenizer instead",e);
+            }
+        }
+        if(tokenizer == null){
+            log.debug("Use Simple Tolenizer for language {}",language);
+            tokenizer = SimpleTokenizer.INSTANCE;
+        } else {
+            log.debug("Use ME Tolenizer for language {}",language);
+        }
+        return tokenizer;
+    }
+    /**
+     * Builds a "part-of-speach" model for the parsed language
+     * @param language the language
+     * @return the model or <code>null</code> if no model data are found
+     * @throws InvalidFormatException in case the found model data are in the wrong format
+     * @throws IOException on any error while reading the model data
+     */
+    public POSModel builtPartOfSpeachModel(String language) throws IOException, InvalidFormatException {
+        //typically there are two versions
+        //we prefer the perceptron variant but if not available try to build the other
+        IOException first = null;
+        POSModel model;
+        try {
+            model = initModel(String.format("%s-pos-perceptron.bin",language), POSModel.class);
+        } catch (IOException e) {
+            first = e;
+            log.warn("Unable to laod preceptron based POS model for "+language,e);
+            model = null;
+        }
+        if(model == null){
+            log.info("No perceptron based POS model for language "+language+
+                "available. Will try to load maxent model");
+            try {
+                model = initModel(String.format("%s-pos-maxent.bin",language), POSModel.class);
+            } catch (IOException e) {
+                if(first != null){
+                    throw first;
+                } else {
+                    throw e;
+                }
+            }
+        }
+        return model;
+    }
+    /**
+     * builds a chunker model for the parsed model
+     * @param language the language
+     * @return the model or <code>null</code> if no model data are present
+     * @throws InvalidFormatException in case the found model data are in the wrong format
+     * @throws IOException on any error while reading the model data
+     */
+    public ChunkerModel builtChunkerModel(String language) throws InvalidFormatException, IOException {
+        return initModel(String.format("%s-chunker.bin", language), ChunkerModel.class);
+    }
+    
+//    /**
+//     * Activates the component and re-enables all {@link DataFileProvider}s
+//     * previously {@link #registerModelLocation(BundleContext, String...) registered}.
+//     * @param context the context
+//     */
+//    @Activate
+//    protected void activate(ComponentContext context){
+//        synchronized (modelLocations) {
+//            for(ModelLocation modelLocation : modelLocations.values()){
+//                if(modelLocation.provider == null){
+//                    modelLocation.provider = new BundleResourceProvider(
+//                        modelLocation.bundleContext, 
+//                        modelLocation.paths == null ? null : Arrays.asList(modelLocation.paths));
+//                } // still registered -> should never happen unless activate is called twice
+//            }
+//        }
+//    }
+//    /**
+//     * Deactivates this component. Deactivates all {@link DataFileProvider}s for
+//     * {@link #registerModelLocation(BundleContext, String...) registered}
+//     * locations to search for OpenNLP models and also 
+//     * {@link Map#clear() clears} the {@link #models model cache}.
+//     * @param context the context
+//     */
+//    @Deactivate
+//    protected void deactivate(ComponentContext context){
+//        synchronized (modelLocations) {
+//            for(ModelLocation modelLocation : modelLocations.values()){
+//                if(modelLocation.provider != null){
+//                    modelLocation.provider.close();
+//                    modelLocation.provider = null;
+//                }
+//            }
+//        }
+//        //clear the model cache
+//        models.clear();
+//    }
+//    /**
+//     * Registers the parsed paths as locations to lookup openNLP models.<p>
+//     * This Method is a convenience for manually registering a 
+//     * {@link DataFileProvider} that provides the openNLP model classes such as:
+//     * <pre><code>
+//     *    protected void activate(ComponentContext context){
+//     *        this.modelProvider = new BundleResourceProvider(
+//     *            context.getBundleContext, Arrays.asList("openNLP/models"));
+//     *        ...
+//     *    }
+//     *    
+//     *    protected void deactivate(ComponentContext context){
+//     *        if(this.modelProvider != null){
+//     *            modelProvider.close();
+//     *            modelProvider = null;
+//     *        }
+//     *        ...
+//     *    }
+//     * </code></pre><p>
+//     * Note that multiple calls with the same bundleContext will cause previous 
+//     * registration for the same {@link BundleContext} to be removed.<p>
+//     * {@link DataFileProvider}s created by this will be removed/added as this
+//     * Component is activated/deactivated. However registrations are not 
+//     * persisted and will be gone after an restart of the OSGI environment
+//     * @param bundleContext The context of the bundle used to load openNLP models
+//     * @param searchPaths The paths used to search openNLP models (via the
+//     * bundles classpath). 
+//     */
+//    public void registerModelLocation(BundleContext bundleContext, String...searchPaths){
+//        if(bundleContext == null){
+//            throw new IllegalArgumentException("The parsed BundleContext MUST NOT be NULL!");
+//        }
+//        String bundleSymbolicName = bundleContext.getBundle().getSymbolicName();
+//        synchronized (modelLocations) {
+//            ModelLocation current = modelLocations.get(bundleSymbolicName);
+//            if(current != null){
+//                if(Arrays.equals(searchPaths, current.paths)) {
+//                    log.debug("ModelLocations for Bundle {} and Paths {} already registered");
+//                    return;
+//                } else { //remove current registration
+//                    log.info("remove existing ModelLocations for Bundle {} and Paths {}",
+//                        bundleSymbolicName,current.paths);
+//                    if(current.provider != null){
+//                        current.provider.close();
+//                    }
+//                }
+//            } else {
+//                current = new ModelLocation();
+//                current.bundleContext = bundleContext;
+//            }
+//            current.paths = searchPaths;
+//            current.provider = new BundleResourceProvider(bundleContext, 
+//                searchPaths == null ? null : Arrays.asList(searchPaths));
+//            modelLocations.put(bundleSymbolicName, current);
+//        }
+//        
+//    }
+//    /**
+//     * Removes previously registerd openNLP model locations for the parsed bundle
+//     * context.
+//     * @param bundleContext
+//     */
+//    public void unregisterModelLocation(BundleContext bundleContext){
+//        if(bundleContext == null){
+//            throw new IllegalArgumentException("The parsed BundleContext MUST NOT be NULL!");
+//        }
+//        String bundleSymbolicName = bundleContext.getBundle().getSymbolicName();
+//        synchronized (modelLocations) {
+//            ModelLocation current = modelLocations.remove(bundleSymbolicName);
+//            if(current != null){
+//                log.info("remove modelLocation for Bundle {} and paths {}",
+//                    bundleSymbolicName,current.paths);
+//                if(current.provider != null){
+//                    current.provider.close();
+//                }
+//            }
+//        }
+//    }
+    
+    /**
+     * Uses generics to build models of the parsed type. The {@link #models}
+     * map is used to lookup already created models.
+     * @param <T> the type of the model to create
+     * @param name the name of the file with the model data
+     * @param modelType the class object representing the model to create
+     * @return the model or <code>null</code> if the model data where not found
+     * @throws InvalidFormatException if the model data are in an invalid format
+     * @throws IOException on any error while loading the model data
+     * @throws IllegalStateException on any Exception while creating the model
+     */
+    @SuppressWarnings("unchecked")
+    private <T> T initModel(String name,Class<T> modelType) throws InvalidFormatException, IOException {
+        Object model = models.get(name);
+        if(model != null) {
+            if(modelType.isAssignableFrom(model.getClass())){
+                return (T) model;
+            } else {
+                throw new IllegalStateException(String.format(
+                    "Incompatible Model Types for name '%s': present=%s | requested=%s",
+                    name,model.getClass(),modelType));
+            }
+        } else { //create new model
+            Map<String,String> modelProperties = new HashMap<String,String>();
+            modelProperties.put("Description", "Statistical model for OpenNLP");
+            modelProperties.put("Model Type:", modelType.getSimpleName());
+            modelProperties.put("Download Location", DOWNLOAD_ROOT+name);
+            InputStream modelDataStream;
+            try {
+                modelDataStream = lookupModelStream(name,modelProperties);
+            } catch (IOException e) {
+                log.info("Unable to load Resource {} via the DataFileProvider",name);
+                return null;
+            }
+            T built;
+            try {
+                Constructor<T> constructor;
+                constructor = modelType.getConstructor(InputStream.class);
+                built = constructor.newInstance(modelDataStream);
+            } catch (SecurityException e) {
+                throw new IllegalStateException(String.format(
+                    "Unable to create %s for %s!",modelType.getSimpleName(),
+                    name),e);
+            } catch (NoSuchMethodException e) {
+                throw new IllegalStateException(String.format(
+                    "Unable to create %s for %s!",modelType.getSimpleName(),
+                    name),e);
+            } catch (IllegalArgumentException e) {
+                throw new IllegalStateException(String.format(
+                    "Unable to create %s for %s!",modelType.getSimpleName(),
+                    name),e);
+            } catch (InstantiationException e) {
+                throw new IllegalStateException(String.format(
+                    "Unable to create %s for %s!",modelType.getSimpleName(),
+                    name),e);
+            } catch (IllegalAccessException e) {
+                throw new IllegalStateException(String.format(
+                    "Unable to create %s for %s!",modelType.getSimpleName(),
+                    name),e);
+            } catch (InvocationTargetException e) {
+                //this indicates an exception while creating the instance
+                //for InvalidFormatException and IO Exceptions we shall
+                //directly throw the cause. for all others wrap the thrown one
+                //in an IllegalStateException
+                Throwable checked = e.getCause();
+                if (checked instanceof InvalidFormatException){
+                    throw (InvalidFormatException)checked;
+                } else if(checked instanceof IOException){
+                    throw (IOException)checked;
+                } else {
+                    throw new IllegalStateException(String.format(
+                        "Unable to create %s for %s!",modelType.getSimpleName(),
+                        name),e);
+                }
+            } finally {
+                IOUtils.closeQuietly(modelDataStream);
+            }
+            models.put(name, built);
+            return built;
+        }
+    }
+    /**
+     * Lookup an openNLP data file via the {@link #dataFileProvider}
+     * @param modelName the name of the model
+     * @return the stream or <code>null</code> if not found
+     * @throws IOException an any error while opening the model file
+     */
+    protected InputStream lookupModelStream(String modelName, Map<String,String> properties) throws IOException {
+        return dataFileProvider.getInputStream(null, modelName,properties);
+    }
+
+    /**
+     * Remove non UTF-8 compliant characters (typically control characters) so has to avoid polluting the
+     * annotation graph with snippets that are not serializable as XML.
+     */
+    protected static String removeNonUtf8CompliantCharacters(final String text) {
+        if (null == text) {
+            return null;
+        }
+        Charset UTF8 = Charset.forName("UTF-8");
+        byte[] bytes = text.getBytes(UTF8);
+        for (int i = 0; i < bytes.length; i++) {
+            byte ch = bytes[i];
+            // remove any characters outside the valid UTF-8 range as well as all control characters
+            // except tabs and new lines
+            if (!((ch > 31 && ch < 253) || ch == '\t' || ch == '\n' || ch == '\r')) {
+                bytes[i] = ' ';
+            }
+        }
+        return new String(bytes, UTF8);
+    }
+}

Propchange: incubator/stanbol/trunk/commons/opennlp/src/main/java/org/apache/stanbol/commons/opennlp/OpenNLP.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Propchange: incubator/stanbol/trunk/commons/stanboltools/bundledatafileprovider/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Sun Jun 26 14:50:34 2011
@@ -0,0 +1,7 @@
+target
+
+.settings
+
+.classpath
+
+.project

Added: incubator/stanbol/trunk/commons/stanboltools/bundledatafileprovider/src/main/java/org/apache/stanbol/commons/stanboltools/datafileprovider/bundle/impl/Activator.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/commons/stanboltools/bundledatafileprovider/src/main/java/org/apache/stanbol/commons/stanboltools/datafileprovider/bundle/impl/Activator.java?rev=1139801&view=auto
==============================================================================
--- incubator/stanbol/trunk/commons/stanboltools/bundledatafileprovider/src/main/java/org/apache/stanbol/commons/stanboltools/datafileprovider/bundle/impl/Activator.java (added)
+++ incubator/stanbol/trunk/commons/stanboltools/bundledatafileprovider/src/main/java/org/apache/stanbol/commons/stanboltools/datafileprovider/bundle/impl/Activator.java Sun Jun 26 14:50:34 2011
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.commons.stanboltools.datafileprovider.bundle.impl;
+
+import org.osgi.framework.BundleActivator;
+import org.osgi.framework.BundleContext;
+
+/**
+ * Simple {@link BundleActivator} that init and close the 
+ * {@link DataBundleInstaller}.
+ * @author Rupert Westenthaler
+ *
+ */
+public class Activator implements BundleActivator {
+    
+    private DataBundleInstaller installer;
+    
+    @Override
+    public void start(BundleContext context) throws Exception {
+        installer = new DataBundleInstaller(context);
+    }
+
+    @Override
+    public void stop(BundleContext context) throws Exception {
+        installer.close();
+        installer = null;
+    }
+
+}

Propchange: incubator/stanbol/trunk/commons/stanboltools/bundledatafileprovider/src/main/java/org/apache/stanbol/commons/stanboltools/datafileprovider/bundle/impl/Activator.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/commons/stanboltools/bundledatafileprovider/src/main/java/org/apache/stanbol/commons/stanboltools/datafileprovider/bundle/impl/BundleDataFileProvider.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/commons/stanboltools/bundledatafileprovider/src/main/java/org/apache/stanbol/commons/stanboltools/datafileprovider/bundle/impl/BundleDataFileProvider.java?rev=1139801&view=auto
==============================================================================
--- incubator/stanbol/trunk/commons/stanboltools/bundledatafileprovider/src/main/java/org/apache/stanbol/commons/stanboltools/datafileprovider/bundle/impl/BundleDataFileProvider.java (added)
+++ incubator/stanbol/trunk/commons/stanboltools/bundledatafileprovider/src/main/java/org/apache/stanbol/commons/stanboltools/datafileprovider/bundle/impl/BundleDataFileProvider.java Sun Jun 26 14:50:34 2011
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.commons.stanboltools.datafileprovider.bundle.impl;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.stanbol.commons.stanboltools.datafileprovider.DataFileProvider;
+import org.osgi.framework.Bundle;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Implementation of the {@link DataFileProvider} interface that uses the
+ * {@link Bundle#getResource(String)} method to load data files. This
+ * method uses the Bundle classpath to search for resource.<p>
+ * Note that this provider searches only the resources within this bundle. The
+ * bundle classpath is NOT used!<p>
+ * Users should use the {@link #close()} method to remove listeners and free up
+ * resources.
+ * @author Rupert Westenthaler
+ *
+ */
+public class BundleDataFileProvider implements DataFileProvider {
+
+    private final Logger log = LoggerFactory.getLogger(getClass());
+    
+    private Bundle bundle;
+    /**
+     * List with the paths to search. Guaranteed to contain at least a single
+     * Element. All contained paths end with {@link File#separator} 
+     */
+    private List<String> searchPaths;
+    
+    /**
+     * Creates a {@link DataFileProvider} that uses the {@link Bundle} to lookup
+     * data files in the directories specified by the parsed relatives path.
+     * @param bundleContext the bundle context used to initialise this DataFileProvider
+     * @param searchPaths the relative paths to the directories used to search
+     * for requested data files. The parsed paths are searches in the provided
+     * order. Parsed paths are normalised by adding missing {@link File#separator}
+     * to its end. if <code>null</code> or an empty list is parsed data files are
+     * searched relative to the root folder of the bundle. Adding an empty
+     * String or the <code>null</code> element allows to search the root folder in
+     * addition to other paths.
+     */
+    public BundleDataFileProvider(Bundle bundle,List<String> searchPaths) {
+        if(bundle == null){
+            throw new IllegalArgumentException("The parsed BundleContext MUST NOT be NULL!");
+        }
+        this.bundle = bundle;
+        if(searchPaths == null || searchPaths.isEmpty()){
+            this.searchPaths = Collections.singletonList(File.separator);
+        } else {
+            List<String> paths = new ArrayList<String>(searchPaths.size());
+            for(String path : searchPaths){
+                if(path == null){ //null element is interpreted as the "" path
+                    path = File.separator;
+                } else if(!path.endsWith(File.separator)){ //normalise
+                    path = path+File.separator;
+                }
+                if(!paths.contains(path)){ //do not add paths more than once
+                    paths.add(path);
+                }
+            }
+            this.searchPaths = Collections.unmodifiableList(paths);
+        }
+    }
+    
+    @Override
+    public InputStream getInputStream(String bundleSymbolicName,
+            String filename, Map<String, String> comments) throws IOException {
+        //If the symbolic name is not null check that is equals to the symbolic
+        //name used to create this classpath data file provider
+        if(bundleSymbolicName != null && 
+                !bundle.getSymbolicName().equals(bundleSymbolicName)) {
+            log.debug("Requested bundleSymbolicName {} does not match mine ({}), request ignored",
+                    bundleSymbolicName, bundle.getSymbolicName());
+            return null;
+        }
+        
+        URL resource = null;
+        Iterator<String> relativePathIterator = searchPaths.iterator();
+        while(resource == null){
+            String path = relativePathIterator.next();
+            final String resourceName = path != null ? path + filename : filename ;
+            resource = bundle.getEntry(resourceName);
+            log.info("Resource {} found: {}", (resource == null ? "NOT" : ""), resourceName);
+        }
+        return resource != null ? resource.openStream() : null;
+    }
+    /**
+     * Getter for the search paths
+     * @return the search paths (read only)
+     */
+    public final List<String> getSearchPaths() {
+        return searchPaths;
+    }
+}

Propchange: incubator/stanbol/trunk/commons/stanboltools/bundledatafileprovider/src/main/java/org/apache/stanbol/commons/stanboltools/datafileprovider/bundle/impl/BundleDataFileProvider.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/commons/stanboltools/bundledatafileprovider/src/main/java/org/apache/stanbol/commons/stanboltools/datafileprovider/bundle/impl/DataBundleInstaller.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/commons/stanboltools/bundledatafileprovider/src/main/java/org/apache/stanbol/commons/stanboltools/datafileprovider/bundle/impl/DataBundleInstaller.java?rev=1139801&view=auto
==============================================================================
--- incubator/stanbol/trunk/commons/stanboltools/bundledatafileprovider/src/main/java/org/apache/stanbol/commons/stanboltools/datafileprovider/bundle/impl/DataBundleInstaller.java (added)
+++ incubator/stanbol/trunk/commons/stanboltools/bundledatafileprovider/src/main/java/org/apache/stanbol/commons/stanboltools/datafileprovider/bundle/impl/DataBundleInstaller.java Sun Jun 26 14:50:34 2011
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.commons.stanboltools.datafileprovider.bundle.impl;
+
+import static org.apache.stanbol.commons.stanboltools.datafileprovider.bundle.BundleResourceProviderConstants.*;
+
+import java.util.Arrays;
+import java.util.Dictionary;
+import java.util.HashMap;
+import java.util.Hashtable;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.stanbol.commons.stanboltools.datafileprovider.DataFileProvider;
+import org.apache.stanbol.commons.stanboltools.datafileprovider.bundle.BundleResourceProviderConstants;
+import org.osgi.framework.Bundle;
+import org.osgi.framework.BundleContext;
+import org.osgi.framework.BundleEvent;
+import org.osgi.framework.BundleListener;
+import org.osgi.framework.Constants;
+import org.osgi.framework.ServiceRegistration;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Creates {@link BundleDataFileProvider} instances for bundles that define
+ * the {@link BundleResourceProviderConstants#BUNDLE_DATAFILE_HEADER} property
+ * in there headers as described by the 
+ * <a href="http://www.aqute.biz/Snippets/Extender"> OSGi extender  pattern </a>
+ * <p>
+ *
+ * @author Rupert Westenthaler
+ */
+public class DataBundleInstaller implements BundleListener {
+
+    private static final Logger log = LoggerFactory.getLogger(DataBundleInstaller.class);
+
+    /**
+     * The scheme we use to register our resources.
+     */
+    private final BundleContext context;
+
+    /**
+     * contains all active bundles as key and the ServiceRegistration for the
+     * {@link BundleDataFileProvider} as value. A <code>null</code> value 
+     * indicates that this bundle needs not to be processed.
+     */
+    private final Map<Bundle, ServiceRegistration> activated = new HashMap<Bundle, ServiceRegistration>();
+
+    public DataBundleInstaller(BundleContext context) {
+        if (context == null) {
+            throw new IllegalArgumentException("The BundleContext MUST NOT be NULL");
+        }
+        this.context = context;
+        this.context.addBundleListener(this);
+        //register the already active bundles
+        registerActive(this.context);
+    }
+
+    /**
+     * Uses the parsed bundle context to register the already active (and currently
+     * starting) bundles.
+     */
+    private void registerActive(BundleContext context) {
+        for (Bundle bundle : context.getBundles()) {
+            if ((bundle.getState() & (Bundle.STARTING | Bundle.ACTIVE)) != 0) {
+                register(bundle);
+            }
+        }
+    }
+
+    @Override
+    public void bundleChanged(BundleEvent event) {
+        switch (event.getType()) {
+            case BundleEvent.STARTED:
+                register(event.getBundle());
+                break;
+
+            case BundleEvent.STOPPED:
+                unregister(event.getBundle());
+                break;
+
+            case BundleEvent.UPDATED:
+                unregister(event.getBundle());
+                register(event.getBundle());
+        }
+    }
+
+    /**
+     * Registers the bundle to the {@link #activated} map.
+     *
+     * @param bundle the bundle to register
+     */
+    @SuppressWarnings("unchecked")
+    private void register(Bundle bundle) {
+        synchronized (activated) {
+            if (activated.containsKey(bundle)) {
+                return;
+            }
+            //for now put the bundle with a null key to avoid duplicate adding
+            activated.put(bundle, null);
+        }
+        log.debug("Register Bundle {} with DataBundleInstaller",bundle.getSymbolicName());
+        Dictionary<String, Object> headers = (Dictionary<String, Object>) bundle.getHeaders();
+        //        log.info("With Headers:");
+        //        for(Enumeration<String> keys = headers.keys();keys.hasMoreElements();){
+        //            String key = keys.nextElement();
+        //            log.info(" > "+key+"="+headers.get(key));
+        //        }
+        String pathsString = (String) headers.get(BUNDLE_DATAFILE_HEADER);
+        if(pathsString != null){
+            Dictionary<String,Object> properties = new Hashtable<String,Object>();
+            String dataFilesRankingString = (String) headers.get(BUNDLE_DATAFILES_PRIORITY_HEADER);
+            if(dataFilesRankingString != null){
+                try {
+                    properties.put(Constants.SERVICE_RANKING, Integer.valueOf(dataFilesRankingString));
+                } catch (NumberFormatException e) {
+                    log.warn("Unable to parse integer value for '{}' from the configured value '{}'. " +
+                    		"Will use default ranking",
+                        BUNDLE_DATAFILES_PRIORITY_HEADER,dataFilesRankingString);
+                }
+            } //else no service ranking
+            List<String> paths = Arrays.asList(pathsString.trim().split(","));
+            BundleDataFileProvider provider = new BundleDataFileProvider(bundle, paths);
+            properties.put(Constants.SERVICE_DESCRIPTION, String.format(
+                "%s for Bundle %s and Paths %s", 
+                BundleDataFileProvider.class.getSimpleName(),bundle.getSymbolicName(),
+                provider.getSearchPaths()));
+            ServiceRegistration registration = context.registerService(
+                DataFileProvider.class.getName(),provider, properties);
+            log.info("Registerd BundleResourceProvider for {} and relative paths {}",
+                context.getBundle().getSymbolicName(),provider.getSearchPaths());
+            synchronized (activated) { //update with the registration
+                if(activated.containsKey(bundle)){
+                    activated.put(bundle, registration);
+                } else { //the bundle was deactivated in the meantime ... unregister :(
+                    registration.unregister();
+                }
+            }
+        } //else key not preset ... ignore bundle!
+    }
+
+    private void unregister(Bundle bundle) {
+        synchronized (activated) {
+            if (!activated.containsKey(bundle)) {
+                return;
+            }
+            ServiceRegistration registration = activated.remove(bundle);
+            if(registration != null){
+                log.info("Unregister BundleDataFileProvider for Bundel {}",bundle.getSymbolicName());
+                registration.unregister();
+            }
+        }
+    }
+    /**
+     * removes the bundle listener
+     */
+    public void close() {
+        context.removeBundleListener(this);
+        synchronized (activated) {
+            for(Entry<Bundle,ServiceRegistration> entry : activated.entrySet()) {
+                if(entry.getValue() != null){
+                    log.info("Unregister BundleDataFileProvider for Bundel {}",entry.getKey().getSymbolicName());
+                    entry.getValue().unregister();
+                }
+            }
+        }
+    }
+}

Propchange: incubator/stanbol/trunk/commons/stanboltools/bundledatafileprovider/src/main/java/org/apache/stanbol/commons/stanboltools/datafileprovider/bundle/impl/DataBundleInstaller.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain