You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2011/04/29 11:20:38 UTC

svn commit: r1097740 [4/10] - in /incubator/stanbol/trunk: entityhub/ entityhub/generic/core/src/main/java/org/apache/stanbol/entityhub/core/mapping/ entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/mapping/ entityhu...

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/MapEntityScoreProvider.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/MapEntityScoreProvider.java?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/MapEntityScoreProvider.java (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/MapEntityScoreProvider.java Fri Apr 29 09:20:31 2011
@@ -0,0 +1,71 @@
+package org.apache.stanbol.entityhub.indexing.core.source;
+
+import java.util.Map;
+
+import org.apache.stanbol.entityhub.indexing.core.EntityScoreProvider;
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+
+/**
+ * Implementation of the {@link EntityScoreProvider} interface based on a
+ * {@link Map} 
+ * @author Rupert Westenthaler
+ */
+public class MapEntityScoreProvider implements EntityScoreProvider {
+    /**
+     * The map with the rankings
+     */
+    private Map<String,Float> rankings;
+    /**
+     * Ranking based entity Evaluator.<p>
+     * Note that Entities with rankings of <code>null</code> or 
+     * <code>&lt; 0</code> will not be indexed.
+     * @param rankings the map holding the rankings
+     * @param normaliser the ScoreNormaliser used to normalise scores or <code>null</code>
+     * to return the scores as present in the map.
+     * @throws IllegalArgumentException if the ranking map is <code>null</code>
+     * or empty and if the parsed minimum ranking is <code> &lt; 0</code>.
+     */
+    public MapEntityScoreProvider(Map<String,Float> rankings) throws IllegalArgumentException{
+        if(rankings == null || rankings.isEmpty()){
+            throw new IllegalArgumentException("The map with the rankings MUST NOT be NULL or empty");
+        }
+        this.rankings = rankings;
+    }
+    @Override
+    public void setConfiguration(Map<String,Object> config) {
+        throw new UnsupportedOperationException("Map based configuration is not supported by this implementation!");
+    }
+    @Override
+    public boolean needsInitialisation() {
+        return false;
+    }
+    @Override
+    public void initialise() {
+        // nothing to do
+    }
+    @Override
+    public void close() {
+        //do not remove the elements because the map might be also used by others
+        this.rankings = null;
+    }
+    /**
+     * Returns <code>false</code> because this implementation does not need the
+     * data of the Entities
+     * @see EntityScoreProvider#needsData()
+     */
+    @Override
+    public boolean needsData() {
+        return false;
+    }
+
+    @Override
+    public Float process(String id) {
+        return rankings.get(id);
+    }
+
+    @Override
+    public Float process(Representation entity) throws UnsupportedOperationException {
+        throw new UnsupportedOperationException("This Class uses process(String id) for evaluation");
+    }
+
+}

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/MapEntityScoreProvider.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceImporter.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceImporter.java?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceImporter.java (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceImporter.java Fri Apr 29 09:20:31 2011
@@ -0,0 +1,21 @@
+package org.apache.stanbol.entityhub.indexing.core.source;
+
+import java.io.IOException;
+import java.io.InputStream;
+/**
+ * The processor used by the resource loader to load registered resources
+ * @author Rupert Westenthaler
+ *
+ */
+public interface ResourceImporter {
+    /**
+     * Processes an resource and returns the new state for that resource
+     * @param is the stream to read the resource from
+     * @param resourceName the name of the resource
+     * @return the State of the resource after the processing
+     * @throws IOException On any error while reading the resource. Throwing
+     * an IOException will set the state or the resource to
+     * {@link ResourceState#ERROR}
+     */
+    ResourceState importResource(InputStream is,String resourceName) throws IOException;
+}

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceImporter.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceLoader.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceLoader.java?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceLoader.java (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceLoader.java Fri Apr 29 09:20:31 2011
@@ -0,0 +1,255 @@
+package org.apache.stanbol.entityhub.indexing.core.source;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.EnumSet;
+import java.util.Enumeration;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.Map.Entry;
+
+import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
+import org.apache.commons.compress.archivers.zip.ZipFile;
+import org.apache.commons.io.FilenameUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class ResourceLoader {
+    
+    private static final Logger log = LoggerFactory.getLogger(ResourceLoader.class);
+    private final ResourceImporter resourceImporter;
+    private final Map<String,ResourceState> files;
+    /**
+     * for future uses to activate/deactivate parsing of entries within ZIP
+     * archives. If <code>false</code> the ZIP archive will be parsed as a 
+     * whole. If <code>true</code> the Entries of the ZIP archive will be
+     * parsed to the resource handler.
+     */
+    private boolean loadEntriesWithinZipArchives = true;
+    public ResourceLoader(ResourceImporter resourceImporter) {
+        this(resourceImporter,true,null);
+    }
+    public ResourceLoader(ResourceImporter resourceImporter, boolean processEntriesWithinArchives) {
+        this(resourceImporter,processEntriesWithinArchives,null);
+    }
+    public ResourceLoader(ResourceImporter resourceImporter, boolean processEntriesWithinArchives,File fileOrDirectory) {
+        if(resourceImporter == null){
+            throw new IllegalStateException("The parsed ResourceProcessor instance MUST NOT be NULL!");
+        }
+        this.resourceImporter = resourceImporter;
+        this.loadEntriesWithinZipArchives = processEntriesWithinArchives;
+        //use a tree map to have the files sorted
+        this.files = new TreeMap<String,ResourceState>();
+        addResource(fileOrDirectory);
+    }
+
+    /**
+     * Adds a new {@link File} resource to this resource loader. In case a
+     * directory is parsed, all files directly within this directory will be 
+     * also added. Note that hidden Files are ignored.
+     * @param fileOrDirectory the file/directory to add.
+     */
+    public void addResource(File fileOrDirectory){
+        if(fileOrDirectory != null){
+            for(String file:getFiles(fileOrDirectory)){
+                ResourceState state = files.get(file);
+                if(state == null){
+                    log.debug("File {} registered to this RdfLoader",file);
+                    files.put(file, ResourceState.REGISTERED);
+                } else if(state == ResourceState.ERROR){
+                    log.info("Readding file {} after previous error while loading",file);
+                } else {
+                    log.info("Ignore file {} because it already present with state {}",file,state);
+                }
+            }
+        }
+    }
+    /**
+     * Getter for the read only status of the resource loader.
+     * @return the read only view of the status
+     */
+    public Map<String,ResourceState> getResourceStates(){
+        return Collections.unmodifiableMap(files);
+    }
+    /**
+     * Getter for all resources that are currently in the parsed state.
+     * This Method returns a copy of all resources in the parsed state.
+     * @param state the processing state
+     * @return A copy of all resources in the parsed state
+     */
+    public Collection<String> getResources(ResourceState state){
+        if(state == null){
+            return Collections.emptySet();
+        } else {
+            return getResources(EnumSet.of(state));
+        }
+    }
+    /**
+     * Getter for all resources that are currently in on of the parsed states.
+     * This Method returns a copy of all resources in such states.
+     * @param states the processing states
+     * @return A copy of all resources in one of the parsed states
+     */
+    public Collection<String> getResources(Set<ResourceState> states){
+        if(states == null){
+            return Collections.emptySet();
+        } else {
+            Collection<String> files = new HashSet<String>();
+            synchronized (this.files) {
+                for(Entry<String,ResourceState> entry : this.files.entrySet()){
+                    if(states.contains(entry.getValue())){
+                        files.add(entry.getKey());
+                    }
+                }
+            }
+            return files;
+        }
+    }
+    public void loadResources(){
+        Collection<String> fileToLoad;
+        do { //to support adding of new files while loading
+            fileToLoad = getResources(ResourceState.REGISTERED);
+            long start=System.currentTimeMillis();
+            log.info("Loding RDF {} File{} ...",fileToLoad.size(),fileToLoad.size()>1?"s":"");
+            for (String file : fileToLoad) {
+                loadResource(file);
+            }
+            log.info(" ... {} files imported in {} seconds", 
+                fileToLoad.size(),(System.currentTimeMillis()-start)/1000);
+        } while(!fileToLoad.isEmpty());
+    }
+    /**
+     * Loads a resource from a file
+     * @param file the file resource
+     */
+    private void loadResource(String file) {
+        synchronized (files) { 
+            //sync to files to avoid two threads loading the same file
+            ResourceState state = files.get(file);
+            if(state == null || state != ResourceState.REGISTERED){
+                log.info("Do not load File {} because of its state {} (null means removed from list)",
+                    file,state);
+                return; //someone removed it in between
+            } else { //set to loading
+                setResourceState(file, ResourceState.LOADING, null);
+            }
+        }
+        long startFile = System.currentTimeMillis();
+        log.info(" > loading '{}' ...", file);
+        String extension = FilenameUtils.getExtension(file);
+        if(loadEntriesWithinZipArchives && (
+                "zip".equalsIgnoreCase(extension) ||
+                "jar".equalsIgnoreCase(extension))){
+            log.info("  - processing {}-archive entries:",extension);
+            ZipFile zipArchive;
+            try {
+                zipArchive = new ZipFile(file);
+            } catch (IOException e) {
+                zipArchive = null;
+                setResourceState(file, ResourceState.ERROR,e);
+            }
+            if(zipArchive != null){
+                boolean isError = false;
+                Enumeration<ZipArchiveEntry> entries = zipArchive.getEntries();
+                while(entries.hasMoreElements()){
+                    ZipArchiveEntry entry = entries.nextElement();
+                    if(!entry.isDirectory()){
+                        String entryName = entry.getName();
+                        log.info("     o loading entry '{}'", entryName);
+                        try {
+                            ResourceState state = resourceImporter.importResource(
+                                zipArchive.getInputStream(entry), 
+                                FilenameUtils.getName(entryName));
+                            if(state == ResourceState.ERROR){
+                                isError = true;
+                            }
+                        } catch (IOException e) {
+                            isError = true;
+                        }
+                    }
+                }
+                //set the state for the Archive as a whole
+                setResourceState(file, 
+                    isError ? ResourceState.ERROR : ResourceState.LOADED, null);
+            }
+        } else {
+            InputStream is;
+            try {
+                is = new FileInputStream(file);
+                ResourceState state = resourceImporter.importResource(is,
+                    FilenameUtils.getName(file));
+                setResourceState(file, state, null);
+            } catch (FileNotFoundException e) {
+                //during init it is checked that files exists and are files 
+                //and there is read access so this can only happen if
+                //someone deletes the file in between
+                setResourceState(file, ResourceState.ERROR, e);
+            } catch (IOException e) {
+                setResourceState(file, ResourceState.ERROR, e);
+            }
+        }
+        log.info("   - completed in {} seconds", 
+            (System.currentTimeMillis()-startFile)/1000);
+    }
+    /**
+     * Getter for the files based on a parsed File or Directory. Hidden Files
+     * are ignored. Doese not search recursively to the directory structure!
+     * @param fileOrDir The file or directory
+     * @return the Collection of files found based on the parameter
+     */
+    private static Collection<String> getFiles(File fileOrDir){
+        if(fileOrDir == null){
+            return Collections.emptySet();
+        } else if(fileOrDir.isHidden()){
+            return Collections.emptySet();
+        } else if(fileOrDir.isFile()){
+            return Collections.singleton(fileOrDir.getPath());
+        } else if(fileOrDir.isDirectory()){
+            Collection<String> files = new ArrayList<String>();
+            for(File file : fileOrDir.listFiles()){
+                if(file.isFile() && !file.isHidden()){
+                    files.add(FilenameUtils.concat(fileOrDir.getPath(), file.getPath()));
+                }
+            }
+            return files;
+        } else { //file does not exist
+            return Collections.emptySet();
+        }
+    }
+    /**
+     * Logs the Exception and sets the file to the {@link ResourceState#ERROR}
+     * state
+     * @param file the affected file
+     * @param e the Exception
+     */
+    private void setResourceState(String file, ResourceState state,Exception e) {
+        if(e != null){
+            log.error("Exception while loading file "+file,e);
+        }
+        if(state == null){
+            //ensure that there are no null values in the map
+            throw new IllegalArgumentException("The parsed ProcessingState MUST NOT be NULL!");
+        }
+        if(file == null){
+            //ignore calls if file is null
+            return;
+        }
+        synchronized (files) {
+            if(files.containsKey(file)){
+                log.debug("File {} now in state {}",file,state);
+                files.put(file, state);
+            } else {
+                log.info("Ignore Error for File {} because it is no longer registered with this RdfLoader",
+                    file);
+            }
+        }
+    }
+}

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceLoader.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceState.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceState.java?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceState.java (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceState.java Fri Apr 29 09:20:31 2011
@@ -0,0 +1,32 @@
+/**
+ * 
+ */
+package org.apache.stanbol.entityhub.indexing.core.source;
+
+/**
+ * State of resources managed by the ResourceLoader
+ * @author Rupert Westenthaler
+ *
+ */
+public enum ResourceState {
+    /**
+     * Resources that are registered but not yet processed
+     */
+    REGISTERED,
+    /**
+     * Resources that are currently processed
+     */
+    LOADING,
+    /**
+     * Resources that where successfully loaded
+     */
+    LOADED,
+    /**
+     * Resources that where ignored
+     */
+    IGNORED,
+    /**
+     * Indicates an Error while processing a resource
+     */
+    ERROR
+}
\ No newline at end of file

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceState.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ConfigTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ConfigTest.java?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ConfigTest.java (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ConfigTest.java Fri Apr 29 09:20:31 2011
@@ -0,0 +1,84 @@
+package org.apache.stanbol.entityhub.indexing.core;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig;
+import org.apache.stanbol.entityhub.indexing.core.normaliser.MinScoreNormalizer;
+import org.apache.stanbol.entityhub.indexing.core.normaliser.NaturalLogNormaliser;
+import org.apache.stanbol.entityhub.indexing.core.normaliser.RangeNormaliser;
+import org.apache.stanbol.entityhub.indexing.core.normaliser.ScoreNormaliser;
+import org.apache.stanbol.entityhub.indexing.core.source.LineBasedEntityIterator;
+
+import static org.junit.Assert.*;
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class ConfigTest {
+    private static final Logger log = LoggerFactory.getLogger(ConfigTest.class);
+    /**
+     * mvn copies the resources in "src/test/resources" to target/test-classes
+     */
+    private static final String TEST_CONFIGS_ROOT = "/target/test-classes/testConfigs/";
+    private static String testRoot;
+    @BeforeClass
+    public static void initTestRootFolder(){
+        String baseDir = System.getProperty("basedir");
+        if(baseDir == null){
+            baseDir = System.getProperty("user.dir");
+        }
+        testRoot = baseDir+TEST_CONFIGS_ROOT;
+        log.info("ConfigTest Root ="+testRoot);
+    }
+    @Test(expected=IllegalArgumentException.class)
+    public void missingRoot(){
+        new IndexingConfig(); //there is no indexing folder in the user.dir
+    }
+    @Test(expected=IllegalArgumentException.class)
+    public void missingConfigDir(){
+        new IndexingConfig(testRoot+"missingconfig");
+    }
+    @Test
+    public void loadSimpleConfigDir(){
+        IndexingConfig config = new IndexingConfig(testRoot+"simple");
+        //test the name
+        assertEquals(config.getName(),"simple");
+        assertEquals(config.getDescription(), "Simple Configuration");
+        //test if the normaliser configuration was parsed correctly!
+        final ScoreNormaliser normaliser = config.getNormaliser();
+        ScoreNormaliser testNormaliser = normaliser;
+        assertNotNull(testNormaliser);
+        assertEquals(testNormaliser.getClass(), RangeNormaliser.class);
+        testNormaliser = testNormaliser.getChained();
+        assertNotNull(testNormaliser);
+        assertEquals(testNormaliser.getClass(), NaturalLogNormaliser.class);
+        testNormaliser = testNormaliser.getChained();
+        assertNotNull(testNormaliser);
+        assertEquals(testNormaliser.getClass(), MinScoreNormalizer.class);
+        EntityIterator entityIterator = config.getEntityIdIterator();
+        assertNotNull(entityIterator);
+        assertEquals(entityIterator.getClass(), LineBasedEntityIterator.class);
+        Map<String,Float> entityIds = new HashMap<String,Float>();
+        //the values test if the normaliser configuration was readed correctly
+        //the keys if the configured entiyScore file was configured correctly
+        float boost = 10f/(float)Math.log1p(100);
+        entityIds.put("http://www.example.org/entity/test", Float.valueOf(10));
+        entityIds.put("http://www.example.org/entity/test2", Float.valueOf((float)(Math.log1p(10)*boost)));
+        entityIds.put("http://www.example.org/entity/test3", Float.valueOf(-1));
+        while(entityIterator.hasNext()){
+            EntityIterator.EntityScore entityScore = entityIterator.next();
+            Float expectedScore = entityIds.remove(entityScore.id);
+            assertNotNull("Entity with ID "+entityScore.id+" not found!",expectedScore);
+            Float score = normaliser.normalise(entityScore.score);
+            assertTrue("Entity score "+score+" is not the expected "+expectedScore,expectedScore.compareTo(score)==0);
+        }
+        assertTrue(entityIds.isEmpty());
+        EntityProcessor processor = config.getEntityProcessor();
+        assertNotNull(processor);
+    }
+    
+
+}

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ConfigTest.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyEntityDataSource.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyEntityDataSource.java?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyEntityDataSource.java (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyEntityDataSource.java Fri Apr 29 09:20:31 2011
@@ -0,0 +1,67 @@
+/**
+ * 
+ */
+package org.apache.stanbol.entityhub.indexing.core;
+
+import java.util.Iterator;
+import java.util.Map;
+
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+
+/**
+ * Dummy implementation of an {@link EntityDataIterable} and {@link EntityDataProvider}
+ * that reads the entity data directly form {@link IndexerTest#testData}
+ * @author Rupert Westenthaler
+ *
+ */
+public class DummyEntityDataSource implements EntityDataIterable, EntityDataProvider {
+
+    @Override
+    public EntityDataIterator entityDataIterator() {
+        return new EntityDataIterator() {
+            Iterator<Representation> rep = IndexerTest.testData.values().iterator();
+            Representation current = null;
+            @Override
+            public void remove() {
+                throw new UnsupportedOperationException();
+            }
+            @Override
+            public String next() {
+                current = rep.next();
+                return current.getId();
+            }
+            @Override
+            public boolean hasNext() {
+                return rep.hasNext();
+            }
+            @Override
+            public Representation getRepresentation() {
+                return current;
+            }
+            @Override
+            public void close() {}
+        };
+    }
+
+    @Override
+    public void close() {
+    }
+
+    @Override
+    public void initialise() {
+    }
+
+    @Override
+    public boolean needsInitialisation() {
+        return false;
+    }
+
+    @Override
+    public void setConfiguration(Map<String,Object> config) {
+    }
+
+    @Override
+    public Representation getEntityData(String id) {
+        return IndexerTest.testData.get(id);
+    }
+}
\ No newline at end of file

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyEntityDataSource.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyEntityIdSource.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyEntityIdSource.java?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyEntityIdSource.java (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyEntityIdSource.java Fri Apr 29 09:20:31 2011
@@ -0,0 +1,53 @@
+/**
+ * 
+ */
+package org.apache.stanbol.entityhub.indexing.core;
+
+import java.util.Iterator;
+import java.util.Map;
+
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
+
+/**
+ * Dummy implementation of an {@link EntityIterator} that reads entity ids
+ * directly form {@link IndexerTest#testData}
+ * @author Rupert Westenthaler
+ *
+ */
+public class DummyEntityIdSource implements EntityIterator {
+    private Iterator<Representation> entiyIterator = IndexerTest.testData.values().iterator();
+    @Override
+    public void close() {
+    }
+
+    @Override
+    public void initialise() {
+    }
+
+    @Override
+    public boolean needsInitialisation() {
+        return false;
+    }
+
+    @Override
+    public void setConfiguration(Map<String,Object> config) {
+    }
+
+    @Override
+    public boolean hasNext() {
+        return entiyIterator.hasNext();
+    }
+
+    @Override
+    public EntityScore next() {
+        Representation next = entiyIterator.next();
+        Number score = next.getFirst(RdfResourceEnum.signRank.getUri(), Number.class);
+        return new EntityScore(next.getId(), score == null?0:score.floatValue());
+    }
+
+    @Override
+    public void remove() {
+        throw new UnsupportedOperationException();
+    }
+}
\ No newline at end of file

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyEntityIdSource.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyEntityScoreSource.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyEntityScoreSource.java?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyEntityScoreSource.java (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyEntityScoreSource.java Fri Apr 29 09:20:31 2011
@@ -0,0 +1,52 @@
+/**
+ * 
+ */
+package org.apache.stanbol.entityhub.indexing.core;
+
+import java.util.Map;
+
+import org.apache.stanbol.entityhub.indexing.core.EntityIterator.EntityScore;
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
+
+/**
+ * Dummy implementation of an {@link EntityScoreProvider} that creates
+ * {@link EntityScore} instances directly based on the test data stored in
+ * {@link IndexerTest#testData}
+ * @author Rupert Westenthaler
+ *
+ */
+public class DummyEntityScoreSource implements EntityScoreProvider {
+
+    @Override
+    public boolean needsData() {
+        return true;
+    }
+
+    @Override
+    public Float process(String id) throws UnsupportedOperationException {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public Float process(Representation entity) throws UnsupportedOperationException {
+        return entity.getFirst(RdfResourceEnum.signRank.getUri(), Float.class);
+    }
+
+    @Override
+    public void close() {
+    }
+
+    @Override
+    public void initialise() {
+    }
+
+    @Override
+    public boolean needsInitialisation() {
+        return false;
+    }
+
+    @Override
+    public void setConfiguration(Map<String,Object> config) {
+    }
+}
\ No newline at end of file

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyEntityScoreSource.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyIndexingDestination.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyIndexingDestination.java?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyIndexingDestination.java (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyIndexingDestination.java Fri Apr 29 09:20:31 2011
@@ -0,0 +1,170 @@
+/**
+ * 
+ */
+package org.apache.stanbol.entityhub.indexing.core;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Map;
+
+import org.apache.stanbol.entityhub.core.model.InMemoryValueFactory;
+import org.apache.stanbol.entityhub.core.query.DefaultQueryFactory;
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory;
+import org.apache.stanbol.entityhub.servicesapi.query.FieldQuery;
+import org.apache.stanbol.entityhub.servicesapi.query.FieldQueryFactory;
+import org.apache.stanbol.entityhub.servicesapi.query.QueryResultList;
+import org.apache.stanbol.entityhub.servicesapi.yard.Yard;
+import org.apache.stanbol.entityhub.servicesapi.yard.YardException;
+/**
+ * Dummy implementation of an {@link IndexingDestination} that writes results
+ * directly into {@link IndexerTest#indexedData}
+ * @author Rupert Westenthaler
+ *
+ */
+public class DummyIndexingDestination implements IndexingDestination {
+
+    Yard yard = new Yard() {
+        
+        @Override
+        public Iterable<Representation> update(Iterable<Representation> representations) throws YardException, IllegalArgumentException {
+            Collection<Representation> updated = new ArrayList<Representation>();
+            for(Representation rep : representations){
+                try {
+                    updated.add(update(rep));
+                }catch(IllegalArgumentException e){
+                    updated.add(null);
+                }
+            }
+            return updated;
+        }
+        @Override
+        public Representation update(Representation represnetation) throws YardException, IllegalArgumentException {
+            if(represnetation == null){
+                return represnetation;
+            }
+            if(IndexerTest.indexedData.containsKey(represnetation.getId())){
+                IndexerTest.indexedData.put(represnetation.getId(), represnetation);
+            } else {
+                throw new IllegalArgumentException("Representation "+represnetation.getId()+" not present in store");
+            }
+            return represnetation;
+        }
+        
+        @Override
+        public Iterable<Representation> store(Iterable<Representation> representations) throws NullPointerException,
+                                                                                       YardException {
+            for(Representation rep : representations){
+                store(rep);
+            }
+            return representations;
+        }
+        
+        @Override
+        public Representation store(Representation representation) throws NullPointerException, YardException {
+            if(representation != null){
+                IndexerTest.indexedData.put(representation.getId(), representation);
+            }
+            return representation;
+        }
+        
+        @Override
+        public void remove(Iterable<String> ids) throws IllegalArgumentException, YardException {
+            for(String id :ids){
+                remove(id);
+            }
+        }
+        
+        @Override
+        public void remove(String id) throws IllegalArgumentException, YardException {
+            IndexerTest.indexedData.remove(id);
+        }
+        
+        @Override
+        public boolean isRepresentation(String id) throws YardException, IllegalArgumentException {
+            return IndexerTest.indexedData.containsKey(id);
+        }
+        
+        @Override
+        public ValueFactory getValueFactory() {
+            return InMemoryValueFactory.getInstance();
+        }
+        
+        @Override
+        public Representation getRepresentation(String id) throws YardException, IllegalArgumentException {
+            return IndexerTest.indexedData.get(id);
+        }
+        
+        @Override
+        public FieldQueryFactory getQueryFactory() {
+            return DefaultQueryFactory.getInstance();
+        }
+        
+        @Override
+        public String getName() {
+            // TODO Auto-generated method stub
+            return null;
+        }
+        
+        @Override
+        public String getId() {
+            return "dummyYard";
+        }
+        
+        @Override
+        public String getDescription() {
+            return "Dummy Implementation of the Yard interface for unit testing";
+        }
+        
+        @Override
+        public QueryResultList<Representation> findRepresentation(FieldQuery query) throws YardException, IllegalArgumentException {
+            throw new UnsupportedOperationException("I think this is not needed for testing");
+        }
+        
+        @Override
+        public QueryResultList<String> findReferences(FieldQuery query) throws YardException, IllegalArgumentException {
+            throw new UnsupportedOperationException("I think this is not needed for testing");
+        }
+        
+        @Override
+        public QueryResultList<Representation> find(FieldQuery query) throws YardException, IllegalArgumentException {
+            throw new UnsupportedOperationException("I think this is not needed for testing");
+       }
+        
+        @Override
+        public Representation create(String id) throws IllegalArgumentException, YardException {
+            return InMemoryValueFactory.getInstance().createRepresentation(id);
+        }
+        
+        @Override
+        public Representation create() throws YardException {
+            return InMemoryValueFactory.getInstance().createRepresentation("urn:"+System.currentTimeMillis()+"-"+Math.random());
+        }
+    };
+    @Override
+    public void finalise() {
+    }
+
+    @Override
+    public Yard getYard() {
+        return yard;
+    }
+
+    @Override
+    public void close() {
+    }
+
+    @Override
+    public void initialise() {
+    }
+
+    @Override
+    public boolean needsInitialisation() {
+        return false;
+    }
+
+    @Override
+    public void setConfiguration(Map<String,Object> config) {
+    }
+    
+}
\ No newline at end of file

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyIndexingDestination.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/IndexerTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/IndexerTest.java?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/IndexerTest.java (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/IndexerTest.java Fri Apr 29 09:20:31 2011
@@ -0,0 +1,211 @@
+package org.apache.stanbol.entityhub.indexing.core;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+import java.util.Map.Entry;
+
+import org.apache.stanbol.entityhub.core.model.InMemoryValueFactory;
+import org.apache.stanbol.entityhub.servicesapi.defaults.NamespaceEnum;
+import org.apache.stanbol.entityhub.servicesapi.model.Reference;
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+import org.apache.stanbol.entityhub.servicesapi.model.Text;
+import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory;
+import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
+import org.junit.After;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import static org.junit.Assert.*;
+
+
+public class IndexerTest {
+    
+    /**
+     * The number of Entities added to the {@link #testData}<p>
+     * Should be > 100000 to test printing of the Indexing statistics after
+     * 100000 entities.<p>
+     * Note that the source and the indexed entities are kept in memory!
+     */
+    private static final int NUM_ENTITIES = 101000;
+    
+    /**
+     * Holds the test data as defined by a static{} block
+     */
+    protected static final Map<String,Representation> testData = new HashMap<String,Representation>();
+    /**
+     * Hold the results of the indexing process
+     */
+    protected static final Map<String,Representation> indexedData = new HashMap<String,Representation>();
+    /**
+     * mvn copies the resources in "src/test/resources" to target/test-classes
+     */
+    private static final String TEST_FOLDER_NAME = "/target/test-classes/indexerTests/";
+    protected static Logger log = LoggerFactory.getLogger(IndexerTest.class);
+    private static String rootDir;
+    private static IndexerFactory factory;
+    
+    private static final String DC_TITLE = NamespaceEnum.dcTerms+"title";
+    private static final String DC_CREATED = NamespaceEnum.dcTerms+"created";
+    private static final String DC_CREATOR = NamespaceEnum.dcTerms+"creator";
+    private static final String RDF_TYPE = NamespaceEnum.rdf+"type";
+    private static final String ENTITY_RANK = RdfResourceEnum.signRank.getUri();
+    private static final Set<String> EXPECTED_LANGUAGES = Collections.unmodifiableSet(
+        new HashSet<String>(Arrays.asList("en","de")));
+    private static final float EXPECTED_MAX_RANK = 100;
+    private static final float MAX_INCOMMING = 10000;
+
+    @BeforeClass
+    public static void init(){
+        String baseDir = System.getProperty("basedir");
+        if(baseDir == null){
+            baseDir = System.getProperty("user.dir");
+        }
+        rootDir = baseDir+TEST_FOLDER_NAME;
+        factory = IndexerFactory.getInstance();
+    }
+    @After
+    public void cleanIndexed(){
+        indexedData.clear();
+    }
+    
+    @Test
+    public void testDataInteratingMode(){
+        Indexer indexer = factory.create(rootDir+"dataIterating");
+        indexer.index();
+        //check that all entities have been indexed
+        validateAllIndexed();
+    }
+    @Test
+    public void testEntityIdIteratingMode(){
+        Indexer indexer = factory.create(rootDir+"idIterating");
+        indexer.index();
+        //check that all entities have been indexed
+        validateAllIndexed();
+        
+    }
+    /**
+     * validate the all the indexed resources!<p>
+     * NOTE: That the asserts expect a specific configuration as provided by the
+     * directory used to create the {@link IndexerFactory} used to initialise
+     * the test.
+     */
+    private void validateAllIndexed() {
+        assertEquals("Number of Indexed Entities "+indexedData.size()+
+            "!= the Number of Source Entities "+NUM_ENTITIES,
+            NUM_ENTITIES,indexedData.size());
+        log.info("Validate Indexing Results:");
+        float maxRank = 0;
+        float minRank = EXPECTED_MAX_RANK;
+        double rankSum = 0;
+        for(Entry<String,Representation> entry : indexedData.entrySet()){
+            assertEquals(entry.getKey(), entry.getValue().getId());
+            float rank = validateIndexed(entry.getValue());
+            if(rank > maxRank){
+                maxRank = rank;
+            }
+            if(rank < minRank){
+                minRank = rank;
+            }
+            rankSum += rank;
+        }
+        log.info("Entity Rank:");
+        log.info(String.format(" - maximum %8.5f",maxRank));
+        log.info(String.format(" - minimum %8.5f",minRank));
+        //expected
+        double expectedAverage = Math.log1p(MAX_INCOMMING/2)*EXPECTED_MAX_RANK/Math.log1p(MAX_INCOMMING);
+        double average = rankSum/NUM_ENTITIES;
+        log.info(String.format(" - average %8.5f (expected %8.5f) ",
+            average, expectedAverage));
+        assertTrue(String.format(
+            "average score %8.5f is more than 5 precent lower than the expeded average %8.5f",
+            average,expectedAverage),
+            average > expectedAverage-(0.05*EXPECTED_MAX_RANK));
+        assertTrue(String.format(
+            "average score %8.5f is more than 5 precent higher than the expeded average %8.5f",
+            average,expectedAverage),
+            average < expectedAverage+(0.05*EXPECTED_MAX_RANK));
+    }
+    
+    
+    private float validateIndexed(Representation rep) {
+        //first check that the dc-element fields are mapped to dc-terms
+        Object value = rep.getFirst(DC_CREATOR);
+        assertTrue(value instanceof String);
+        value = rep.getFirst(DC_CREATED);
+        assertTrue(value instanceof Date);
+        for(Iterator<Object> types = rep.get(RDF_TYPE);types.hasNext();){
+            value = types.next();
+            assertTrue(value instanceof Reference);
+            assertFalse(((Reference)value).getReference().isEmpty());
+        }
+        for(Iterator<Object> types = rep.get(DC_TITLE);types.hasNext();){
+            value = types.next();
+            assertTrue(value instanceof Text);
+            assertFalse(((Text)value).getText().isEmpty());
+            assertTrue(EXPECTED_LANGUAGES.contains(((Text)value).getLanguage()));
+        }
+        Float rankObject = rep.getFirst(ENTITY_RANK,Float.class);
+        assertNotNull(rankObject);
+        float rank = rankObject.floatValue();
+        assertTrue("Rank"+rank+" > expected maximum "+EXPECTED_MAX_RANK,
+            rank <= EXPECTED_MAX_RANK);
+        assertTrue("Rank"+rank+" < expected maximum "+0,
+            rank >= 0);
+        return rank;
+    }
+    /*
+     * Initialisation of the Test data stored in testData
+     */
+    static{
+        ValueFactory vf = InMemoryValueFactory.getInstance();
+        for(int i=0;i<NUM_ENTITIES;i++){
+            Collection<Text> names = new ArrayList<Text>();
+            Collection<Reference> types = new ArrayList<Reference>();
+            if(i%2==0){
+                if(i%5==0){
+                    names.add(vf.createText("City "+i, "en"));
+                    names.add(vf.createText("Stadt "+i,"de"));
+                    types.add(vf.createReference(NamespaceEnum.dbpediaOnt+"City"));
+                } else if(i%3==0){
+                    names.add(vf.createText("Village "+i,"en"));
+                    names.add(vf.createText("Gemeinde "+i,"de"));
+                    types.add(vf.createReference(NamespaceEnum.dbpediaOnt+"PopulatedPlace"));
+                } else {
+                    names.add(vf.createText("Location "+i, "en"));
+                    names.add(vf.createText("Platz "+i,"de"));
+                }
+                types.add(vf.createReference(NamespaceEnum.dbpediaOnt+"Place"));
+            } else if(i%3==0){
+                names.add(vf.createText("Person "+i,"en"));
+                names.add(vf.createText("Person "+i,"de"));
+                types.add(vf.createReference(NamespaceEnum.dbpediaOnt+"Person"));
+            } else if(i%5==0){
+                names.add(vf.createText("Organisation "+i,"en"));
+                names.add(vf.createText("Organisation "+i,"de"));
+                types.add(vf.createReference(NamespaceEnum.dbpediaOnt+"Organisation"));
+            } else {
+                names.add(vf.createText("Event "+i,"en"));
+                names.add(vf.createText("Event "+i,"de"));
+                types.add(vf.createReference(NamespaceEnum.dbpediaOnt+"Event"));
+            }
+            Representation rep = vf.createRepresentation("http://www.example.com/entity/test#entity-"+i);
+            rep.add(NamespaceEnum.dcElements+"title", names);
+            rep.add(NamespaceEnum.rdf+"type", types);
+            rep.add(NamespaceEnum.dcElements+"created", new Date());
+            rep.add(NamespaceEnum.dcElements+"creator", IndexerTest.class.getSimpleName());
+            //use a random between [0..{MAX_INCOMMING}] as score
+            Integer incomming = Integer.valueOf((int)Math.round((Math.random()*MAX_INCOMMING)));
+            rep.add(RdfResourceEnum.signRank.getUri(), incomming);
+            testData.put(rep.getId(), rep);
+        }
+    }
+}

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/IndexerTest.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ResourceLoaderTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ResourceLoaderTest.java?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ResourceLoaderTest.java (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ResourceLoaderTest.java Fri Apr 29 09:20:31 2011
@@ -0,0 +1,175 @@
+package org.apache.stanbol.entityhub.indexing.core;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashSet;
+
+import org.apache.commons.io.FilenameUtils;
+import org.apache.commons.io.IOUtils;
+import org.apache.stanbol.entityhub.indexing.core.source.ResourceImporter;
+import org.apache.stanbol.entityhub.indexing.core.source.ResourceLoader;
+import org.apache.stanbol.entityhub.indexing.core.source.ResourceState;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import static org.junit.Assert.*;
+
+public class ResourceLoaderTest {
+    /**
+     * mvn copies the resources in "src/test/resources" to target/test-classes
+     */
+    private static final String TEST_CONFIGS_ROOT = "/target/test-classes/resourceLoaderTest/";
+    private static final String TEST_FOLDER_NAME = "testFolder/";
+    protected static Logger log = LoggerFactory.getLogger(ResourceLoaderTest.class);
+    private static String rootDir;
+
+        public static class DummyResourceImporter implements ResourceImporter {
+
+        Collection<String> expectedNames;
+        DummyResourceImporter(Collection<String> expectedResource){
+            this.expectedNames = new HashSet<String>();
+            for(String resource : expectedResource){
+                //this works only if there are not two files with the same name
+                //so add an assertion to check for that
+                String name = FilenameUtils.getName(resource);
+                assertFalse("This Test requires that there are no files with the same name!",
+                    expectedNames.contains(name));
+                this.expectedNames.add(name);
+            }
+        }
+        @Override
+        public ResourceState importResource(InputStream is, String resourceName) throws IOException {
+            assertNotNull(is);
+            assertNotNull(resourceName);
+            assertFalse(resourceName.isEmpty());
+            assertTrue("resourceName '"+resourceName+"' not expected",
+                expectedNames.remove(resourceName));
+            IOUtils.closeQuietly(is);
+            log.debug("Import Resource {}",resourceName);
+            if(resourceName.startsWith("ignore")){
+                return ResourceState.IGNORED;
+            } else if(resourceName.startsWith("error")){
+                throw new IOException("To test an Error");
+            } else {
+                return ResourceState.LOADED;
+            }
+        }
+        public void checkAllProcessed(){
+            assertTrue(expectedNames.isEmpty());
+        }
+        
+    }
+    
+    @BeforeClass
+    public static void init(){
+        String baseDir = System.getProperty("basedir");
+        if(baseDir == null){
+            baseDir = System.getProperty("user.dir");
+        }
+        rootDir = baseDir+TEST_CONFIGS_ROOT;
+    }
+
+    @Test
+    public void testSingleFile(){
+        DummyResourceImporter importer = new DummyResourceImporter(
+            Arrays.asList(rootDir+"singleFileTest.txt"));
+        ResourceLoader loader = new ResourceLoader(importer, false, 
+            new File(rootDir,"singleFileTest.txt"));
+        assertEquals(new HashSet<String>(Arrays.asList(rootDir+"singleFileTest.txt")), 
+            loader.getResources(ResourceState.REGISTERED));
+        assertTrue(loader.getResources(ResourceState.ERROR).isEmpty());
+        assertTrue(loader.getResources(ResourceState.LOADED).isEmpty());
+        assertTrue(loader.getResources(ResourceState.IGNORED).isEmpty());
+        loader.loadResources();
+        assertEquals(new HashSet<String>(Arrays.asList(rootDir+"singleFileTest.txt")),
+            loader.getResources(ResourceState.LOADED));
+        assertTrue(loader.getResources(ResourceState.REGISTERED).isEmpty());
+        assertTrue(loader.getResources(ResourceState.IGNORED).isEmpty());
+        assertTrue(loader.getResources(ResourceState.ERROR).isEmpty());
+        importer.checkAllProcessed();
+        
+    }
+    @Test
+    public void testFolderWithoutProcessingArchives(){
+        String folder = rootDir+TEST_FOLDER_NAME;
+        Collection<String> expectedFolderResources = new HashSet<String>(Arrays.asList(
+            folder+"archiveInFolder.zip",
+            folder+"archiveWithIgnore.zip",
+            folder+"archiveWithError.zip",
+            folder+"errorFileInFolder.txt",
+            folder+"fileInFolder.txt",
+            folder+"ignoreFileInFolder.txt",
+            folder+"otherFileInFolder.txt"));
+        DummyResourceImporter importer = new DummyResourceImporter(
+            expectedFolderResources);
+        ResourceLoader loader = new ResourceLoader(importer, false, 
+            new File(rootDir,"testFolder"));
+        assertEquals(expectedFolderResources, loader.getResources(ResourceState.REGISTERED));
+        assertTrue(loader.getResources(ResourceState.ERROR).isEmpty());
+        assertTrue(loader.getResources(ResourceState.LOADED).isEmpty());
+        assertTrue(loader.getResources(ResourceState.IGNORED).isEmpty());
+        loader.loadResources();
+        assertEquals(new HashSet<String>(Arrays.asList(
+            folder+"archiveInFolder.zip", folder+"fileInFolder.txt",
+            folder+"otherFileInFolder.txt",folder+"archiveWithIgnore.zip",
+            folder+"archiveWithError.zip")), 
+            loader.getResources(ResourceState.LOADED));
+        assertTrue(loader.getResources(ResourceState.REGISTERED).isEmpty());
+        assertEquals(new HashSet<String>(Arrays.asList(
+            folder+"errorFileInFolder.txt")), 
+            loader.getResources(ResourceState.ERROR));
+        assertEquals(new HashSet<String>(Arrays.asList(
+            folder+"ignoreFileInFolder.txt")), 
+            loader.getResources(ResourceState.IGNORED));
+    }
+    @Test
+    public void testFolderWithProcessingArchives(){
+        String folder = rootDir+TEST_FOLDER_NAME;
+        Collection<String> expectedResources = new HashSet<String>(Arrays.asList(
+            folder+"archiveInFolder.zip",
+            folder+"archiveWithIgnore.zip",
+            folder+"archiveWithError.zip",
+            folder+"errorFileInFolder.txt",
+            folder+"fileInFolder.txt",
+            folder+"ignoreFileInFolder.txt",
+            folder+"otherFileInFolder.txt"));
+        //the resourceNames send to the importer are now different because the
+        //archives are processed and the entries are sent to the  ResourceImporter
+        Collection<String> expectedResourceNames = Arrays.asList(
+            "fileInArchive.txt", //part of archiveInFolder.zip
+            "otherFileInArchive.txt", //part of archiveInFolder.zip
+            "ignoreFileInArchive.txt", //part of archiveWithIgnore.zip
+            "errorFileInArchive.txt", //part of archiveWithError.zip
+            "errorFileInFolder.txt",
+            "fileInFolder.txt",
+            "ignoreFileInFolder.txt",
+            "otherFileInFolder.txt");
+        DummyResourceImporter importer = new DummyResourceImporter(
+            expectedResourceNames);
+        ResourceLoader loader = new ResourceLoader(importer, true, 
+            new File(rootDir,TEST_FOLDER_NAME));
+
+        assertEquals(expectedResources, loader.getResources(ResourceState.REGISTERED));
+        assertTrue(loader.getResources(ResourceState.ERROR).isEmpty());
+        assertTrue(loader.getResources(ResourceState.LOADED).isEmpty());
+        assertTrue(loader.getResources(ResourceState.IGNORED).isEmpty());
+        loader.loadResources();
+        assertEquals(new HashSet<String>(Arrays.asList(
+            folder+"archiveInFolder.zip", 
+            folder+"archiveWithIgnore.zip", //ignored files in archives are OK
+            folder+"fileInFolder.txt",folder+"otherFileInFolder.txt")), 
+            loader.getResources(ResourceState.LOADED));
+        assertTrue(loader.getResources(ResourceState.REGISTERED).isEmpty());
+        assertEquals(new HashSet<String>(Arrays.asList(
+            folder+"errorFileInFolder.txt",
+            folder+"archiveWithError.zip")), //archive with errors MUST be ERROR
+            loader.getResources(ResourceState.ERROR));
+        assertEquals(new HashSet<String>(Arrays.asList(
+            folder+"ignoreFileInFolder.txt")), 
+            loader.getResources(ResourceState.IGNORED));
+    }
+}

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ResourceLoaderTest.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/indexerTestMappings.txt
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/indexerTestMappings.txt?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/indexerTestMappings.txt (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/indexerTestMappings.txt Fri Apr 29 09:20:31 2011
@@ -0,0 +1,15 @@
+# --- Define the Languages for all fields ---
+| @=null;en;de;fr;it
+
+# --- RDF, RDFS and OWL Mappings ---
+rdf:*
+rdfs:*
+# convert rdf:type statements to References
+rdf:type | d=entityhub:ref
+
+# --- Dublin Core (dc terms and dc elements) ---
+dc:*
+# convert DC Elements to dc namespace
+dc-elements:title > dc:title
+dc-elements:created > dc:created
+dc-elements:creator > dc:creator
\ No newline at end of file

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/indexerTestMappings.txt
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/indexing.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/indexing.properties?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/indexing.properties (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/indexing.properties Fri Apr 29 09:20:31 2011
@@ -0,0 +1,27 @@
+name=test
+description=Indexer Test Configuration
+
+#used to iterate over the entity data
+entityDataIterable=org.apache.stanbol.entityhub.indexing.core.DummyEntityDataSource
+
+#used to provide entity data for a given id (not used for this test)
+#entityDataProvider=org.apache.stanbol.entityhub.indexing.core.DummyEntityDataSource
+
+#used to normalize scores
+scoreNormalizer=org.apache.stanbol.entityhub.indexing.core.normaliser.RangeNormaliser,config:range;org.apache.stanbol.entityhub.indexing.core.normaliser.NaturalLogNormaliser
+
+#used to iterate over Entities (not used by this test)
+#entityIdIterator=org.apache.stanbol.entityhub.indexing.core.LineBasedEntityIterator,source:testEntityScore.txt,charset:UTF-8,encodeIds:false
+
+#used to provide the score for Entities
+entityScoreProvider=org.apache.stanbol.entityhub.indexing.core.source.EntityFieldScoreProvider
+
+#used to process indexed Entities
+#will use the mappings configured for "fieldConfiguration" if not otherwise specified
+entityProcessor=org.apache.stanbol.entityhub.indexing.core.processor.FiledMapperProcessor
+
+#used to store the configuration of the index within the yard
+fieldConfiguration=indexerTestMappings.txt
+
+#The destination responsible to store the indexed entities
+indexingDestination=org.apache.stanbol.entityhub.indexing.core.DummyIndexingDestination
\ No newline at end of file

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/indexing.properties
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/minIncomming.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/minIncomming.properties?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/minIncomming.properties (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/minIncomming.properties Fri Apr 29 09:20:31 2011
@@ -0,0 +1,2 @@
+inclusive=true
+min-score=2
\ No newline at end of file

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/minIncomming.properties
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/range.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/range.properties?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/range.properties (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/range.properties Fri Apr 29 09:20:31 2011
@@ -0,0 +1,4 @@
+#this value is normalized by all chained normalizers
+max-expected-score=10000
+#the upper bound of the resulting range [0..{upper-bound}]
+upper-bound=100
\ No newline at end of file

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/range.properties
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/indexerTestMappings.txt
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/indexerTestMappings.txt?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/indexerTestMappings.txt (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/indexerTestMappings.txt Fri Apr 29 09:20:31 2011
@@ -0,0 +1,15 @@
+# --- Define the Languages for all fields ---
+| @=null;en;de;fr;it
+
+# --- RDF, RDFS and OWL Mappings ---
+rdf:*
+rdfs:*
+# convert rdf:type statements to References
+rdf:type | d=entityhub:ref
+
+# --- Dublin Core (dc terms and dc elements) ---
+dc:*
+# convert DC Elements to dc namespace
+dc-elements:title > dc:title
+dc-elements:created > dc:created
+dc-elements:creator > dc:creator
\ No newline at end of file

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/indexerTestMappings.txt
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/indexing.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/indexing.properties?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/indexing.properties (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/indexing.properties Fri Apr 29 09:20:31 2011
@@ -0,0 +1,27 @@
+name=test
+description=Indexer Test Configuration
+
+#used to iterate over the entity data (not used for this test)
+#entityDataIterable=org.apache.stanbol.entityhub.indexing.core.DummyEntityDataSource
+
+#used to provide entity data for a given id 
+entityDataProvider=org.apache.stanbol.entityhub.indexing.core.DummyEntityDataSource
+
+#used to normalize scores
+scoreNormalizer=org.apache.stanbol.entityhub.indexing.core.normaliser.RangeNormaliser,config:range;org.apache.stanbol.entityhub.indexing.core.normaliser.NaturalLogNormaliser
+
+#used to iterate over Entities
+entityIdIterator=org.apache.stanbol.entityhub.indexing.core.DummyEntityIdSource
+
+#used to provide the score for Entities
+#entityScoreProvider=org.apache.stanbol.entityhub.indexing.core.source.EntityFieldScoreProvider
+
+#used to process indexed Entities
+#will use the mappings configured for "fieldConfiguration" if not otherwise specified
+entityProcessor=org.apache.stanbol.entityhub.indexing.core.processor.FiledMapperProcessor
+
+#used to store the configuration of the index within the yard
+fieldConfiguration=indexerTestMappings.txt
+
+#The destination responsible to store the indexed entities
+indexingDestination=org.apache.stanbol.entityhub.indexing.core.DummyIndexingDestination
\ No newline at end of file

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/indexing.properties
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/minIncomming.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/minIncomming.properties?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/minIncomming.properties (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/minIncomming.properties Fri Apr 29 09:20:31 2011
@@ -0,0 +1,2 @@
+inclusive=true
+min-score=2
\ No newline at end of file

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/minIncomming.properties
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/range.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/range.properties?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/range.properties (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/range.properties Fri Apr 29 09:20:31 2011
@@ -0,0 +1,4 @@
+#this value is normalized by all chained normalizers
+max-expected-score=10000
+#the upper bound of the resulting range [0..{upper-bound}]
+upper-bound=100
\ No newline at end of file

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/range.properties
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/singleFileTest.txt
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/singleFileTest.txt?rev=1097740&view=auto
==============================================================================
    (empty)

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/singleFileTest.txt
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/archiveInFolder.zip
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/archiveInFolder.zip?rev=1097740&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/archiveInFolder.zip
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/archiveWithError.zip
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/archiveWithError.zip?rev=1097740&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/archiveWithError.zip
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/archiveWithIgnore.zip
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/archiveWithIgnore.zip?rev=1097740&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/archiveWithIgnore.zip
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/errorFileInFolder.txt
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/errorFileInFolder.txt?rev=1097740&view=auto
==============================================================================
    (empty)

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/errorFileInFolder.txt
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/fileInFolder.txt
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/fileInFolder.txt?rev=1097740&view=auto
==============================================================================
    (empty)

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/fileInFolder.txt
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/ignoreFileInFolder.txt
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/ignoreFileInFolder.txt?rev=1097740&view=auto
==============================================================================
    (empty)

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/ignoreFileInFolder.txt
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/otherFileInFolder.txt
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/otherFileInFolder.txt?rev=1097740&view=auto
==============================================================================
    (empty)

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/otherFileInFolder.txt
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/dbPediaMappings.txt
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/dbPediaMappings.txt?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/dbPediaMappings.txt (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/dbPediaMappings.txt Fri Apr 29 09:20:31 2011
@@ -0,0 +1,61 @@
+# --- Define the Languages for all fields ---
+| @=null;en;de;fr;it
+
+# --- RDF, RDFS and OWL Mappings ---
+rdfs:label
+rdfs:comment
+rdf:type | d=entityhub:ref
+# used by LOD to link to URIs used to identify the same Entity
+owl:sameAs | d=entityhub:ref
+
+# --- Dublin Core (dc terms and dc elements) ---
+dc:*
+# all DC Elements (one could also define the mappings to the DC Terms counterparts here
+dc-elements:*
+
+# --- Spatial Things ---
+geo:lat | d=xsd:double
+geo:long | d=xsd:double
+geo:alt | d=xsd:int;xsd:float
+# one can also copy the valued from the DBpedia properties
+#dbp-prop:latitude | d=xsd:decimal > geo:lat
+#dbp-prop:longitude | d=xsd:decimal > geo:long
+
+# --- Thesaurus (via SKOS) ---
+#SKOS can be used to define hierarchical terminologies
+skos:*
+skos:broader | d=entityhub:ref
+skos:narrower | d=entityhub:ref
+skos:related | d=entityhub:ref
+skos:member | d=entityhub:ref
+skos:subject | d=entityhub:ref
+skos:inScheme | d=entityhub:ref
+skos:hasTopConcept | d=entityhub:ref
+skos:topConceptOf | d=entityhub:ref
+
+# --- Social Networks (via foaf) ---
+#The Friend of a Friend schema often used to describe social relations between people
+foaf:* 
+foaf:knows | d=entityhub:ref
+foaf:made | d=entityhub:ref
+foaf:maker | d=entityhub:ref
+foaf:member | d=entityhub:ref
+foaf:homepage | d=xsd:anyURI
+# also use the DBpedia property website for oaf:homepage!
+dbp-prop:website | d=xsd:anyURI > foaf:homepage
+foaf:depiction | d=xsd:anyURI
+# also use the DBpedia thumbnail as oaf:depiction
+dbp-ont:thumbnail | d=xsd:anyURI > foaf:depiction
+foaf:img | d=xsd:anyURI
+foaf:logo | d=xsd:anyURI
+# Documents about the entity
+foaf:page | d=xsd:anyURI
+
+# --- dbpedia specific
+# the "dbp-ont" defines knowledge mapped to the DBPedia ontology
+dbp-ont:*
+# the "DBpedia properties are all key values pairs extracted from the info boxes
+# on the right hand side of Wikipedia pages.
+#dbp-prop:*
+# Copy only population for now (one could add additional if necessary)!
+dbp-prop:population | d=xsd:integer
\ No newline at end of file

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/dbPediaMappings.txt
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/indexFieldConfig.txt
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/indexFieldConfig.txt?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/indexFieldConfig.txt (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/indexFieldConfig.txt Fri Apr 29 09:20:31 2011
@@ -0,0 +1,28 @@
+#This is a similar version of the dbPediaMappings.txt but excludes mapping rules
+# --- Define the Languages for all fields ---
+| @=null;en;de;fr;it
+
+# --- RDF, RDFS and OWL Mappings ---
+rdfs:label
+rdfs:comment
+rdf:type
+# used by LOD to link to URIs used to identify the same Entity
+owl:sameAs
+
+# --- Other Namespaces to include---
+dc:*
+dc-elements:*
+geo:*
+skos:*
+foaf:* 
+
+# --- dbpedia specific
+# the "dbp-ont" defines knowledge mapped to the DBPedia ontology
+dbp-ont:*
+
+# the "DBpedia properties are all key values pairs extracted from the info boxes
+# on the right hand side of Wikipedia pages.
+# uncomment the next line to include all
+#dbp-prop:*
+# Currently only the population is added
+dbp-prop:population
\ No newline at end of file

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/indexFieldConfig.txt
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/indexing.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/indexing.properties?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/indexing.properties (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/indexing.properties Fri Apr 29 09:20:31 2011
@@ -0,0 +1,9 @@
+name=simple
+description=Simple Configuration
+
+scoreNormalizer=org.apache.stanbol.entityhub.indexing.core.normaliser.RangeNormaliser,config:range;org.apache.stanbol.entityhub.indexing.core.normaliser.NaturalLogNormaliser;org.apache.stanbol.entityhub.indexing.core.normaliser.MinScoreNormalizer,config:minIncomming
+
+entityIdIterator=org.apache.stanbol.entityhub.indexing.core.source.LineBasedEntityIterator,source:testEntityScore.txt,charset:UTF-8,encodeIds:false
+
+entityProcessor=org.apache.stanbol.entityhub.indexing.core.processor.FiledMapperProcessor,mappings:dbPediaMappings.txt
+fieldConfiguration=indexFieldConfig.txt
\ No newline at end of file

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/indexing.properties
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/minIncomming.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/minIncomming.properties?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/minIncomming.properties (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/minIncomming.properties Fri Apr 29 09:20:31 2011
@@ -0,0 +1,2 @@
+inclusive=true
+min-score=2
\ No newline at end of file

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/minIncomming.properties
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/range.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/range.properties?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/range.properties (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/range.properties Fri Apr 29 09:20:31 2011
@@ -0,0 +1 @@
+upper-bound=10
\ No newline at end of file

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/range.properties
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/resources/testEntityScore.txt
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/resources/testEntityScore.txt?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/resources/testEntityScore.txt (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/resources/testEntityScore.txt Fri Apr 29 09:20:31 2011
@@ -0,0 +1,3 @@
+http://www.example.org/entity/test	100
+http://www.example.org/entity/test2	10
+http://www.example.org/entity/test3	1
\ No newline at end of file

Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/resources/testEntityScore.txt
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: incubator/stanbol/trunk/entityhub/indexing/dblp/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/dblp/pom.xml?rev=1097740&r1=1097739&r2=1097740&view=diff
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/dblp/pom.xml (original)
+++ incubator/stanbol/trunk/entityhub/indexing/dblp/pom.xml Fri Apr 29 09:20:31 2011
@@ -19,14 +19,17 @@
          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 
   <modelVersion>4.0.0</modelVersion>
+<!-- Do not use a parent because of problems with missing dependencies with
+     mvn assembly:assembly
   <parent>
     <groupId>org.apache.stanbol</groupId>
     <artifactId>org.apache.stanbol.entityhub.parent</artifactId>
     <version>0.9-SNAPSHOT</version>
     <relativePath>../../parent</relativePath>
-  </parent>
+  </parent>  -->
   <groupId>org.apache.stanbol</groupId>
   <artifactId>org.apache.stanbol.entityhub.indexing.dblp</artifactId>
+  <version>0.9-SNAPSHOT</version>
   <packaging>jar</packaging>
   <name>Apache Stanbol Entityhub Indexing for dblp</name>
   <description>This uses the RDF dump provided by

Modified: incubator/stanbol/trunk/entityhub/indexing/dbpedia/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/dbpedia/pom.xml?rev=1097740&r1=1097739&r2=1097740&view=diff
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/dbpedia/pom.xml (original)
+++ incubator/stanbol/trunk/entityhub/indexing/dbpedia/pom.xml Fri Apr 29 09:20:31 2011
@@ -19,7 +19,9 @@
          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 
   <modelVersion>4.0.0</modelVersion>
-<!-- <parent>
+<!-- Do not use a parent because of problems with missing dependencies with
+     mvn assembly:assembly
+  <parent>
     <groupId>org.apache.stanbol</groupId>
     <artifactId>org.apache.stanbol.entityhub.parent</artifactId>
     <version>0.9-SNAPSHOT</version>