You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2011/04/29 11:20:38 UTC
svn commit: r1097740 [4/10] - in /incubator/stanbol/trunk: entityhub/
entityhub/generic/core/src/main/java/org/apache/stanbol/entityhub/core/mapping/
entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/mapping/
entityhu...
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/MapEntityScoreProvider.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/MapEntityScoreProvider.java?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/MapEntityScoreProvider.java (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/MapEntityScoreProvider.java Fri Apr 29 09:20:31 2011
@@ -0,0 +1,71 @@
+package org.apache.stanbol.entityhub.indexing.core.source;
+
+import java.util.Map;
+
+import org.apache.stanbol.entityhub.indexing.core.EntityScoreProvider;
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+
+/**
+ * Implementation of the {@link EntityScoreProvider} interface based on a
+ * {@link Map}
+ * @author Rupert Westenthaler
+ */
+public class MapEntityScoreProvider implements EntityScoreProvider {
+ /**
+ * The map with the rankings
+ */
+ private Map<String,Float> rankings;
+ /**
+ * Ranking based entity Evaluator.<p>
+ * Note that Entities with rankings of <code>null</code> or
+ * <code>< 0</code> will not be indexed.
+ * @param rankings the map holding the rankings
+ * @param normaliser the ScoreNormaliser used to normalise scores or <code>null</code>
+ * to return the scores as present in the map.
+ * @throws IllegalArgumentException if the ranking map is <code>null</code>
+ * or empty and if the parsed minimum ranking is <code> < 0</code>.
+ */
+ public MapEntityScoreProvider(Map<String,Float> rankings) throws IllegalArgumentException{
+ if(rankings == null || rankings.isEmpty()){
+ throw new IllegalArgumentException("The map with the rankings MUST NOT be NULL or empty");
+ }
+ this.rankings = rankings;
+ }
+ @Override
+ public void setConfiguration(Map<String,Object> config) {
+ throw new UnsupportedOperationException("Map based configuration is not supported by this implementation!");
+ }
+ @Override
+ public boolean needsInitialisation() {
+ return false;
+ }
+ @Override
+ public void initialise() {
+ // nothing to do
+ }
+ @Override
+ public void close() {
+ //do not remove the elements because the map might be also used by others
+ this.rankings = null;
+ }
+ /**
+ * Returns <code>false</code> because this implementation does not need the
+ * data of the Entities
+ * @see EntityScoreProvider#needsData()
+ */
+ @Override
+ public boolean needsData() {
+ return false;
+ }
+
+ @Override
+ public Float process(String id) {
+ return rankings.get(id);
+ }
+
+ @Override
+ public Float process(Representation entity) throws UnsupportedOperationException {
+ throw new UnsupportedOperationException("This Class uses process(String id) for evaluation");
+ }
+
+}
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/MapEntityScoreProvider.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceImporter.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceImporter.java?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceImporter.java (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceImporter.java Fri Apr 29 09:20:31 2011
@@ -0,0 +1,21 @@
+package org.apache.stanbol.entityhub.indexing.core.source;
+
+import java.io.IOException;
+import java.io.InputStream;
+/**
+ * The processor used by the resource loader to load registered resources
+ * @author Rupert Westenthaler
+ *
+ */
+public interface ResourceImporter {
+ /**
+ * Processes an resource and returns the new state for that resource
+ * @param is the stream to read the resource from
+ * @param resourceName the name of the resource
+ * @return the State of the resource after the processing
+ * @throws IOException On any error while reading the resource. Throwing
+ * an IOException will set the state or the resource to
+ * {@link ResourceState#ERROR}
+ */
+ ResourceState importResource(InputStream is,String resourceName) throws IOException;
+}
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceImporter.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceLoader.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceLoader.java?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceLoader.java (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceLoader.java Fri Apr 29 09:20:31 2011
@@ -0,0 +1,255 @@
+package org.apache.stanbol.entityhub.indexing.core.source;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.EnumSet;
+import java.util.Enumeration;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.Map.Entry;
+
+import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
+import org.apache.commons.compress.archivers.zip.ZipFile;
+import org.apache.commons.io.FilenameUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class ResourceLoader {
+
+ private static final Logger log = LoggerFactory.getLogger(ResourceLoader.class);
+ private final ResourceImporter resourceImporter;
+ private final Map<String,ResourceState> files;
+ /**
+ * for future uses to activate/deactivate parsing of entries within ZIP
+ * archives. If <code>false</code> the ZIP archive will be parsed as a
+ * whole. If <code>true</code> the Entries of the ZIP archive will be
+ * parsed to the resource handler.
+ */
+ private boolean loadEntriesWithinZipArchives = true;
+ public ResourceLoader(ResourceImporter resourceImporter) {
+ this(resourceImporter,true,null);
+ }
+ public ResourceLoader(ResourceImporter resourceImporter, boolean processEntriesWithinArchives) {
+ this(resourceImporter,processEntriesWithinArchives,null);
+ }
+ public ResourceLoader(ResourceImporter resourceImporter, boolean processEntriesWithinArchives,File fileOrDirectory) {
+ if(resourceImporter == null){
+ throw new IllegalStateException("The parsed ResourceProcessor instance MUST NOT be NULL!");
+ }
+ this.resourceImporter = resourceImporter;
+ this.loadEntriesWithinZipArchives = processEntriesWithinArchives;
+ //use a tree map to have the files sorted
+ this.files = new TreeMap<String,ResourceState>();
+ addResource(fileOrDirectory);
+ }
+
+ /**
+ * Adds a new {@link File} resource to this resource loader. In case a
+ * directory is parsed, all files directly within this directory will be
+ * also added. Note that hidden Files are ignored.
+ * @param fileOrDirectory the file/directory to add.
+ */
+ public void addResource(File fileOrDirectory){
+ if(fileOrDirectory != null){
+ for(String file:getFiles(fileOrDirectory)){
+ ResourceState state = files.get(file);
+ if(state == null){
+ log.debug("File {} registered to this RdfLoader",file);
+ files.put(file, ResourceState.REGISTERED);
+ } else if(state == ResourceState.ERROR){
+ log.info("Readding file {} after previous error while loading",file);
+ } else {
+ log.info("Ignore file {} because it already present with state {}",file,state);
+ }
+ }
+ }
+ }
+ /**
+ * Getter for the read only status of the resource loader.
+ * @return the read only view of the status
+ */
+ public Map<String,ResourceState> getResourceStates(){
+ return Collections.unmodifiableMap(files);
+ }
+ /**
+ * Getter for all resources that are currently in the parsed state.
+ * This Method returns a copy of all resources in the parsed state.
+ * @param state the processing state
+ * @return A copy of all resources in the parsed state
+ */
+ public Collection<String> getResources(ResourceState state){
+ if(state == null){
+ return Collections.emptySet();
+ } else {
+ return getResources(EnumSet.of(state));
+ }
+ }
+ /**
+ * Getter for all resources that are currently in on of the parsed states.
+ * This Method returns a copy of all resources in such states.
+ * @param states the processing states
+ * @return A copy of all resources in one of the parsed states
+ */
+ public Collection<String> getResources(Set<ResourceState> states){
+ if(states == null){
+ return Collections.emptySet();
+ } else {
+ Collection<String> files = new HashSet<String>();
+ synchronized (this.files) {
+ for(Entry<String,ResourceState> entry : this.files.entrySet()){
+ if(states.contains(entry.getValue())){
+ files.add(entry.getKey());
+ }
+ }
+ }
+ return files;
+ }
+ }
+ public void loadResources(){
+ Collection<String> fileToLoad;
+ do { //to support adding of new files while loading
+ fileToLoad = getResources(ResourceState.REGISTERED);
+ long start=System.currentTimeMillis();
+ log.info("Loding RDF {} File{} ...",fileToLoad.size(),fileToLoad.size()>1?"s":"");
+ for (String file : fileToLoad) {
+ loadResource(file);
+ }
+ log.info(" ... {} files imported in {} seconds",
+ fileToLoad.size(),(System.currentTimeMillis()-start)/1000);
+ } while(!fileToLoad.isEmpty());
+ }
+ /**
+ * Loads a resource from a file
+ * @param file the file resource
+ */
+ private void loadResource(String file) {
+ synchronized (files) {
+ //sync to files to avoid two threads loading the same file
+ ResourceState state = files.get(file);
+ if(state == null || state != ResourceState.REGISTERED){
+ log.info("Do not load File {} because of its state {} (null means removed from list)",
+ file,state);
+ return; //someone removed it in between
+ } else { //set to loading
+ setResourceState(file, ResourceState.LOADING, null);
+ }
+ }
+ long startFile = System.currentTimeMillis();
+ log.info(" > loading '{}' ...", file);
+ String extension = FilenameUtils.getExtension(file);
+ if(loadEntriesWithinZipArchives && (
+ "zip".equalsIgnoreCase(extension) ||
+ "jar".equalsIgnoreCase(extension))){
+ log.info(" - processing {}-archive entries:",extension);
+ ZipFile zipArchive;
+ try {
+ zipArchive = new ZipFile(file);
+ } catch (IOException e) {
+ zipArchive = null;
+ setResourceState(file, ResourceState.ERROR,e);
+ }
+ if(zipArchive != null){
+ boolean isError = false;
+ Enumeration<ZipArchiveEntry> entries = zipArchive.getEntries();
+ while(entries.hasMoreElements()){
+ ZipArchiveEntry entry = entries.nextElement();
+ if(!entry.isDirectory()){
+ String entryName = entry.getName();
+ log.info(" o loading entry '{}'", entryName);
+ try {
+ ResourceState state = resourceImporter.importResource(
+ zipArchive.getInputStream(entry),
+ FilenameUtils.getName(entryName));
+ if(state == ResourceState.ERROR){
+ isError = true;
+ }
+ } catch (IOException e) {
+ isError = true;
+ }
+ }
+ }
+ //set the state for the Archive as a whole
+ setResourceState(file,
+ isError ? ResourceState.ERROR : ResourceState.LOADED, null);
+ }
+ } else {
+ InputStream is;
+ try {
+ is = new FileInputStream(file);
+ ResourceState state = resourceImporter.importResource(is,
+ FilenameUtils.getName(file));
+ setResourceState(file, state, null);
+ } catch (FileNotFoundException e) {
+ //during init it is checked that files exists and are files
+ //and there is read access so this can only happen if
+ //someone deletes the file in between
+ setResourceState(file, ResourceState.ERROR, e);
+ } catch (IOException e) {
+ setResourceState(file, ResourceState.ERROR, e);
+ }
+ }
+ log.info(" - completed in {} seconds",
+ (System.currentTimeMillis()-startFile)/1000);
+ }
+ /**
+ * Getter for the files based on a parsed File or Directory. Hidden Files
+ * are ignored. Doese not search recursively to the directory structure!
+ * @param fileOrDir The file or directory
+ * @return the Collection of files found based on the parameter
+ */
+ private static Collection<String> getFiles(File fileOrDir){
+ if(fileOrDir == null){
+ return Collections.emptySet();
+ } else if(fileOrDir.isHidden()){
+ return Collections.emptySet();
+ } else if(fileOrDir.isFile()){
+ return Collections.singleton(fileOrDir.getPath());
+ } else if(fileOrDir.isDirectory()){
+ Collection<String> files = new ArrayList<String>();
+ for(File file : fileOrDir.listFiles()){
+ if(file.isFile() && !file.isHidden()){
+ files.add(FilenameUtils.concat(fileOrDir.getPath(), file.getPath()));
+ }
+ }
+ return files;
+ } else { //file does not exist
+ return Collections.emptySet();
+ }
+ }
+ /**
+ * Logs the Exception and sets the file to the {@link ResourceState#ERROR}
+ * state
+ * @param file the affected file
+ * @param e the Exception
+ */
+ private void setResourceState(String file, ResourceState state,Exception e) {
+ if(e != null){
+ log.error("Exception while loading file "+file,e);
+ }
+ if(state == null){
+ //ensure that there are no null values in the map
+ throw new IllegalArgumentException("The parsed ProcessingState MUST NOT be NULL!");
+ }
+ if(file == null){
+ //ignore calls if file is null
+ return;
+ }
+ synchronized (files) {
+ if(files.containsKey(file)){
+ log.debug("File {} now in state {}",file,state);
+ files.put(file, state);
+ } else {
+ log.info("Ignore Error for File {} because it is no longer registered with this RdfLoader",
+ file);
+ }
+ }
+ }
+}
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceLoader.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceState.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceState.java?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceState.java (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceState.java Fri Apr 29 09:20:31 2011
@@ -0,0 +1,32 @@
+/**
+ *
+ */
+package org.apache.stanbol.entityhub.indexing.core.source;
+
+/**
+ * State of resources managed by the ResourceLoader
+ * @author Rupert Westenthaler
+ *
+ */
+public enum ResourceState {
+ /**
+ * Resources that are registered but not yet processed
+ */
+ REGISTERED,
+ /**
+ * Resources that are currently processed
+ */
+ LOADING,
+ /**
+ * Resources that where successfully loaded
+ */
+ LOADED,
+ /**
+ * Resources that where ignored
+ */
+ IGNORED,
+ /**
+ * Indicates an Error while processing a resource
+ */
+ ERROR
+}
\ No newline at end of file
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceState.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ConfigTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ConfigTest.java?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ConfigTest.java (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ConfigTest.java Fri Apr 29 09:20:31 2011
@@ -0,0 +1,84 @@
+package org.apache.stanbol.entityhub.indexing.core;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig;
+import org.apache.stanbol.entityhub.indexing.core.normaliser.MinScoreNormalizer;
+import org.apache.stanbol.entityhub.indexing.core.normaliser.NaturalLogNormaliser;
+import org.apache.stanbol.entityhub.indexing.core.normaliser.RangeNormaliser;
+import org.apache.stanbol.entityhub.indexing.core.normaliser.ScoreNormaliser;
+import org.apache.stanbol.entityhub.indexing.core.source.LineBasedEntityIterator;
+
+import static org.junit.Assert.*;
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class ConfigTest {
+ private static final Logger log = LoggerFactory.getLogger(ConfigTest.class);
+ /**
+ * mvn copies the resources in "src/test/resources" to target/test-classes
+ */
+ private static final String TEST_CONFIGS_ROOT = "/target/test-classes/testConfigs/";
+ private static String testRoot;
+ @BeforeClass
+ public static void initTestRootFolder(){
+ String baseDir = System.getProperty("basedir");
+ if(baseDir == null){
+ baseDir = System.getProperty("user.dir");
+ }
+ testRoot = baseDir+TEST_CONFIGS_ROOT;
+ log.info("ConfigTest Root ="+testRoot);
+ }
+ @Test(expected=IllegalArgumentException.class)
+ public void missingRoot(){
+ new IndexingConfig(); //there is no indexing folder in the user.dir
+ }
+ @Test(expected=IllegalArgumentException.class)
+ public void missingConfigDir(){
+ new IndexingConfig(testRoot+"missingconfig");
+ }
+ @Test
+ public void loadSimpleConfigDir(){
+ IndexingConfig config = new IndexingConfig(testRoot+"simple");
+ //test the name
+ assertEquals(config.getName(),"simple");
+ assertEquals(config.getDescription(), "Simple Configuration");
+ //test if the normaliser configuration was parsed correctly!
+ final ScoreNormaliser normaliser = config.getNormaliser();
+ ScoreNormaliser testNormaliser = normaliser;
+ assertNotNull(testNormaliser);
+ assertEquals(testNormaliser.getClass(), RangeNormaliser.class);
+ testNormaliser = testNormaliser.getChained();
+ assertNotNull(testNormaliser);
+ assertEquals(testNormaliser.getClass(), NaturalLogNormaliser.class);
+ testNormaliser = testNormaliser.getChained();
+ assertNotNull(testNormaliser);
+ assertEquals(testNormaliser.getClass(), MinScoreNormalizer.class);
+ EntityIterator entityIterator = config.getEntityIdIterator();
+ assertNotNull(entityIterator);
+ assertEquals(entityIterator.getClass(), LineBasedEntityIterator.class);
+ Map<String,Float> entityIds = new HashMap<String,Float>();
+ //the values test if the normaliser configuration was readed correctly
+ //the keys if the configured entiyScore file was configured correctly
+ float boost = 10f/(float)Math.log1p(100);
+ entityIds.put("http://www.example.org/entity/test", Float.valueOf(10));
+ entityIds.put("http://www.example.org/entity/test2", Float.valueOf((float)(Math.log1p(10)*boost)));
+ entityIds.put("http://www.example.org/entity/test3", Float.valueOf(-1));
+ while(entityIterator.hasNext()){
+ EntityIterator.EntityScore entityScore = entityIterator.next();
+ Float expectedScore = entityIds.remove(entityScore.id);
+ assertNotNull("Entity with ID "+entityScore.id+" not found!",expectedScore);
+ Float score = normaliser.normalise(entityScore.score);
+ assertTrue("Entity score "+score+" is not the expected "+expectedScore,expectedScore.compareTo(score)==0);
+ }
+ assertTrue(entityIds.isEmpty());
+ EntityProcessor processor = config.getEntityProcessor();
+ assertNotNull(processor);
+ }
+
+
+}
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ConfigTest.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyEntityDataSource.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyEntityDataSource.java?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyEntityDataSource.java (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyEntityDataSource.java Fri Apr 29 09:20:31 2011
@@ -0,0 +1,67 @@
+/**
+ *
+ */
+package org.apache.stanbol.entityhub.indexing.core;
+
+import java.util.Iterator;
+import java.util.Map;
+
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+
+/**
+ * Dummy implementation of an {@link EntityDataIterable} and {@link EntityDataProvider}
+ * that reads the entity data directly form {@link IndexerTest#testData}
+ * @author Rupert Westenthaler
+ *
+ */
+public class DummyEntityDataSource implements EntityDataIterable, EntityDataProvider {
+
+ @Override
+ public EntityDataIterator entityDataIterator() {
+ return new EntityDataIterator() {
+ Iterator<Representation> rep = IndexerTest.testData.values().iterator();
+ Representation current = null;
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ @Override
+ public String next() {
+ current = rep.next();
+ return current.getId();
+ }
+ @Override
+ public boolean hasNext() {
+ return rep.hasNext();
+ }
+ @Override
+ public Representation getRepresentation() {
+ return current;
+ }
+ @Override
+ public void close() {}
+ };
+ }
+
+ @Override
+ public void close() {
+ }
+
+ @Override
+ public void initialise() {
+ }
+
+ @Override
+ public boolean needsInitialisation() {
+ return false;
+ }
+
+ @Override
+ public void setConfiguration(Map<String,Object> config) {
+ }
+
+ @Override
+ public Representation getEntityData(String id) {
+ return IndexerTest.testData.get(id);
+ }
+}
\ No newline at end of file
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyEntityDataSource.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyEntityIdSource.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyEntityIdSource.java?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyEntityIdSource.java (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyEntityIdSource.java Fri Apr 29 09:20:31 2011
@@ -0,0 +1,53 @@
+/**
+ *
+ */
+package org.apache.stanbol.entityhub.indexing.core;
+
+import java.util.Iterator;
+import java.util.Map;
+
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
+
+/**
+ * Dummy implementation of an {@link EntityIterator} that reads entity ids
+ * directly form {@link IndexerTest#testData}
+ * @author Rupert Westenthaler
+ *
+ */
+public class DummyEntityIdSource implements EntityIterator {
+ private Iterator<Representation> entiyIterator = IndexerTest.testData.values().iterator();
+ @Override
+ public void close() {
+ }
+
+ @Override
+ public void initialise() {
+ }
+
+ @Override
+ public boolean needsInitialisation() {
+ return false;
+ }
+
+ @Override
+ public void setConfiguration(Map<String,Object> config) {
+ }
+
+ @Override
+ public boolean hasNext() {
+ return entiyIterator.hasNext();
+ }
+
+ @Override
+ public EntityScore next() {
+ Representation next = entiyIterator.next();
+ Number score = next.getFirst(RdfResourceEnum.signRank.getUri(), Number.class);
+ return new EntityScore(next.getId(), score == null?0:score.floatValue());
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+}
\ No newline at end of file
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyEntityIdSource.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyEntityScoreSource.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyEntityScoreSource.java?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyEntityScoreSource.java (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyEntityScoreSource.java Fri Apr 29 09:20:31 2011
@@ -0,0 +1,52 @@
+/**
+ *
+ */
+package org.apache.stanbol.entityhub.indexing.core;
+
+import java.util.Map;
+
+import org.apache.stanbol.entityhub.indexing.core.EntityIterator.EntityScore;
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
+
+/**
+ * Dummy implementation of an {@link EntityScoreProvider} that creates
+ * {@link EntityScore} instances directly based on the test data stored in
+ * {@link IndexerTest#testData}
+ * @author Rupert Westenthaler
+ *
+ */
+public class DummyEntityScoreSource implements EntityScoreProvider {
+
+ @Override
+ public boolean needsData() {
+ return true;
+ }
+
+ @Override
+ public Float process(String id) throws UnsupportedOperationException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public Float process(Representation entity) throws UnsupportedOperationException {
+ return entity.getFirst(RdfResourceEnum.signRank.getUri(), Float.class);
+ }
+
+ @Override
+ public void close() {
+ }
+
+ @Override
+ public void initialise() {
+ }
+
+ @Override
+ public boolean needsInitialisation() {
+ return false;
+ }
+
+ @Override
+ public void setConfiguration(Map<String,Object> config) {
+ }
+}
\ No newline at end of file
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyEntityScoreSource.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyIndexingDestination.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyIndexingDestination.java?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyIndexingDestination.java (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyIndexingDestination.java Fri Apr 29 09:20:31 2011
@@ -0,0 +1,170 @@
+/**
+ *
+ */
+package org.apache.stanbol.entityhub.indexing.core;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Map;
+
+import org.apache.stanbol.entityhub.core.model.InMemoryValueFactory;
+import org.apache.stanbol.entityhub.core.query.DefaultQueryFactory;
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory;
+import org.apache.stanbol.entityhub.servicesapi.query.FieldQuery;
+import org.apache.stanbol.entityhub.servicesapi.query.FieldQueryFactory;
+import org.apache.stanbol.entityhub.servicesapi.query.QueryResultList;
+import org.apache.stanbol.entityhub.servicesapi.yard.Yard;
+import org.apache.stanbol.entityhub.servicesapi.yard.YardException;
+/**
+ * Dummy implementation of an {@link IndexingDestination} that writes results
+ * directly into {@link IndexerTest#indexedData}
+ * @author Rupert Westenthaler
+ *
+ */
+public class DummyIndexingDestination implements IndexingDestination {
+
+ Yard yard = new Yard() {
+
+ @Override
+ public Iterable<Representation> update(Iterable<Representation> representations) throws YardException, IllegalArgumentException {
+ Collection<Representation> updated = new ArrayList<Representation>();
+ for(Representation rep : representations){
+ try {
+ updated.add(update(rep));
+ }catch(IllegalArgumentException e){
+ updated.add(null);
+ }
+ }
+ return updated;
+ }
+ @Override
+ public Representation update(Representation represnetation) throws YardException, IllegalArgumentException {
+ if(represnetation == null){
+ return represnetation;
+ }
+ if(IndexerTest.indexedData.containsKey(represnetation.getId())){
+ IndexerTest.indexedData.put(represnetation.getId(), represnetation);
+ } else {
+ throw new IllegalArgumentException("Representation "+represnetation.getId()+" not present in store");
+ }
+ return represnetation;
+ }
+
+ @Override
+ public Iterable<Representation> store(Iterable<Representation> representations) throws NullPointerException,
+ YardException {
+ for(Representation rep : representations){
+ store(rep);
+ }
+ return representations;
+ }
+
+ @Override
+ public Representation store(Representation representation) throws NullPointerException, YardException {
+ if(representation != null){
+ IndexerTest.indexedData.put(representation.getId(), representation);
+ }
+ return representation;
+ }
+
+ @Override
+ public void remove(Iterable<String> ids) throws IllegalArgumentException, YardException {
+ for(String id :ids){
+ remove(id);
+ }
+ }
+
+ @Override
+ public void remove(String id) throws IllegalArgumentException, YardException {
+ IndexerTest.indexedData.remove(id);
+ }
+
+ @Override
+ public boolean isRepresentation(String id) throws YardException, IllegalArgumentException {
+ return IndexerTest.indexedData.containsKey(id);
+ }
+
+ @Override
+ public ValueFactory getValueFactory() {
+ return InMemoryValueFactory.getInstance();
+ }
+
+ @Override
+ public Representation getRepresentation(String id) throws YardException, IllegalArgumentException {
+ return IndexerTest.indexedData.get(id);
+ }
+
+ @Override
+ public FieldQueryFactory getQueryFactory() {
+ return DefaultQueryFactory.getInstance();
+ }
+
+ @Override
+ public String getName() {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public String getId() {
+ return "dummyYard";
+ }
+
+ @Override
+ public String getDescription() {
+ return "Dummy Implementation of the Yard interface for unit testing";
+ }
+
+ @Override
+ public QueryResultList<Representation> findRepresentation(FieldQuery query) throws YardException, IllegalArgumentException {
+ throw new UnsupportedOperationException("I think this is not needed for testing");
+ }
+
+ @Override
+ public QueryResultList<String> findReferences(FieldQuery query) throws YardException, IllegalArgumentException {
+ throw new UnsupportedOperationException("I think this is not needed for testing");
+ }
+
+ @Override
+ public QueryResultList<Representation> find(FieldQuery query) throws YardException, IllegalArgumentException {
+ throw new UnsupportedOperationException("I think this is not needed for testing");
+ }
+
+ @Override
+ public Representation create(String id) throws IllegalArgumentException, YardException {
+ return InMemoryValueFactory.getInstance().createRepresentation(id);
+ }
+
+ @Override
+ public Representation create() throws YardException {
+ return InMemoryValueFactory.getInstance().createRepresentation("urn:"+System.currentTimeMillis()+"-"+Math.random());
+ }
+ };
+ @Override
+ public void finalise() {
+ }
+
+ @Override
+ public Yard getYard() {
+ return yard;
+ }
+
+ @Override
+ public void close() {
+ }
+
+ @Override
+ public void initialise() {
+ }
+
+ @Override
+ public boolean needsInitialisation() {
+ return false;
+ }
+
+ @Override
+ public void setConfiguration(Map<String,Object> config) {
+ }
+
+}
\ No newline at end of file
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/DummyIndexingDestination.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/IndexerTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/IndexerTest.java?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/IndexerTest.java (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/IndexerTest.java Fri Apr 29 09:20:31 2011
@@ -0,0 +1,211 @@
+package org.apache.stanbol.entityhub.indexing.core;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+import java.util.Map.Entry;
+
+import org.apache.stanbol.entityhub.core.model.InMemoryValueFactory;
+import org.apache.stanbol.entityhub.servicesapi.defaults.NamespaceEnum;
+import org.apache.stanbol.entityhub.servicesapi.model.Reference;
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+import org.apache.stanbol.entityhub.servicesapi.model.Text;
+import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory;
+import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
+import org.junit.After;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import static org.junit.Assert.*;
+
+
+public class IndexerTest {
+
+ /**
+ * The number of Entities added to the {@link #testData}<p>
+ * Should be > 100000 to test printing of the Indexing statistics after
+ * 100000 entities.<p>
+ * Note that the source and the indexed entities are kept in memory!
+ */
+ private static final int NUM_ENTITIES = 101000;
+
+ /**
+ * Holds the test data as defined by a static{} block
+ */
+ protected static final Map<String,Representation> testData = new HashMap<String,Representation>();
+ /**
+ * Hold the results of the indexing process
+ */
+ protected static final Map<String,Representation> indexedData = new HashMap<String,Representation>();
+ /**
+ * mvn copies the resources in "src/test/resources" to target/test-classes
+ */
+ private static final String TEST_FOLDER_NAME = "/target/test-classes/indexerTests/";
+ protected static Logger log = LoggerFactory.getLogger(IndexerTest.class);
+ private static String rootDir;
+ private static IndexerFactory factory;
+
+ private static final String DC_TITLE = NamespaceEnum.dcTerms+"title";
+ private static final String DC_CREATED = NamespaceEnum.dcTerms+"created";
+ private static final String DC_CREATOR = NamespaceEnum.dcTerms+"creator";
+ private static final String RDF_TYPE = NamespaceEnum.rdf+"type";
+ private static final String ENTITY_RANK = RdfResourceEnum.signRank.getUri();
+ private static final Set<String> EXPECTED_LANGUAGES = Collections.unmodifiableSet(
+ new HashSet<String>(Arrays.asList("en","de")));
+ private static final float EXPECTED_MAX_RANK = 100;
+ private static final float MAX_INCOMMING = 10000;
+
+ @BeforeClass
+ public static void init(){
+ String baseDir = System.getProperty("basedir");
+ if(baseDir == null){
+ baseDir = System.getProperty("user.dir");
+ }
+ rootDir = baseDir+TEST_FOLDER_NAME;
+ factory = IndexerFactory.getInstance();
+ }
+ @After
+ public void cleanIndexed(){
+ indexedData.clear();
+ }
+
+ @Test
+ public void testDataInteratingMode(){
+ Indexer indexer = factory.create(rootDir+"dataIterating");
+ indexer.index();
+ //check that all entities have been indexed
+ validateAllIndexed();
+ }
+ @Test
+ public void testEntityIdIteratingMode(){
+ Indexer indexer = factory.create(rootDir+"idIterating");
+ indexer.index();
+ //check that all entities have been indexed
+ validateAllIndexed();
+
+ }
+ /**
+ * validate the all the indexed resources!<p>
+ * NOTE: That the asserts expect a specific configuration as provided by the
+ * directory used to create the {@link IndexerFactory} used to initialise
+ * the test.
+ */
+ private void validateAllIndexed() {
+ assertEquals("Number of Indexed Entities "+indexedData.size()+
+ "!= the Number of Source Entities "+NUM_ENTITIES,
+ NUM_ENTITIES,indexedData.size());
+ log.info("Validate Indexing Results:");
+ float maxRank = 0;
+ float minRank = EXPECTED_MAX_RANK;
+ double rankSum = 0;
+ for(Entry<String,Representation> entry : indexedData.entrySet()){
+ assertEquals(entry.getKey(), entry.getValue().getId());
+ float rank = validateIndexed(entry.getValue());
+ if(rank > maxRank){
+ maxRank = rank;
+ }
+ if(rank < minRank){
+ minRank = rank;
+ }
+ rankSum += rank;
+ }
+ log.info("Entity Rank:");
+ log.info(String.format(" - maximum %8.5f",maxRank));
+ log.info(String.format(" - minimum %8.5f",minRank));
+ //expected
+ double expectedAverage = Math.log1p(MAX_INCOMMING/2)*EXPECTED_MAX_RANK/Math.log1p(MAX_INCOMMING);
+ double average = rankSum/NUM_ENTITIES;
+ log.info(String.format(" - average %8.5f (expected %8.5f) ",
+ average, expectedAverage));
+ assertTrue(String.format(
+ "average score %8.5f is more than 5 precent lower than the expeded average %8.5f",
+ average,expectedAverage),
+ average > expectedAverage-(0.05*EXPECTED_MAX_RANK));
+ assertTrue(String.format(
+ "average score %8.5f is more than 5 precent higher than the expeded average %8.5f",
+ average,expectedAverage),
+ average < expectedAverage+(0.05*EXPECTED_MAX_RANK));
+ }
+
+
+ private float validateIndexed(Representation rep) {
+ //first check that the dc-element fields are mapped to dc-terms
+ Object value = rep.getFirst(DC_CREATOR);
+ assertTrue(value instanceof String);
+ value = rep.getFirst(DC_CREATED);
+ assertTrue(value instanceof Date);
+ for(Iterator<Object> types = rep.get(RDF_TYPE);types.hasNext();){
+ value = types.next();
+ assertTrue(value instanceof Reference);
+ assertFalse(((Reference)value).getReference().isEmpty());
+ }
+ for(Iterator<Object> types = rep.get(DC_TITLE);types.hasNext();){
+ value = types.next();
+ assertTrue(value instanceof Text);
+ assertFalse(((Text)value).getText().isEmpty());
+ assertTrue(EXPECTED_LANGUAGES.contains(((Text)value).getLanguage()));
+ }
+ Float rankObject = rep.getFirst(ENTITY_RANK,Float.class);
+ assertNotNull(rankObject);
+ float rank = rankObject.floatValue();
+ assertTrue("Rank"+rank+" > expected maximum "+EXPECTED_MAX_RANK,
+ rank <= EXPECTED_MAX_RANK);
+ assertTrue("Rank"+rank+" < expected maximum "+0,
+ rank >= 0);
+ return rank;
+ }
+ /*
+ * Initialisation of the Test data stored in testData
+ */
+ static{
+ ValueFactory vf = InMemoryValueFactory.getInstance();
+ for(int i=0;i<NUM_ENTITIES;i++){
+ Collection<Text> names = new ArrayList<Text>();
+ Collection<Reference> types = new ArrayList<Reference>();
+ if(i%2==0){
+ if(i%5==0){
+ names.add(vf.createText("City "+i, "en"));
+ names.add(vf.createText("Stadt "+i,"de"));
+ types.add(vf.createReference(NamespaceEnum.dbpediaOnt+"City"));
+ } else if(i%3==0){
+ names.add(vf.createText("Village "+i,"en"));
+ names.add(vf.createText("Gemeinde "+i,"de"));
+ types.add(vf.createReference(NamespaceEnum.dbpediaOnt+"PopulatedPlace"));
+ } else {
+ names.add(vf.createText("Location "+i, "en"));
+ names.add(vf.createText("Platz "+i,"de"));
+ }
+ types.add(vf.createReference(NamespaceEnum.dbpediaOnt+"Place"));
+ } else if(i%3==0){
+ names.add(vf.createText("Person "+i,"en"));
+ names.add(vf.createText("Person "+i,"de"));
+ types.add(vf.createReference(NamespaceEnum.dbpediaOnt+"Person"));
+ } else if(i%5==0){
+ names.add(vf.createText("Organisation "+i,"en"));
+ names.add(vf.createText("Organisation "+i,"de"));
+ types.add(vf.createReference(NamespaceEnum.dbpediaOnt+"Organisation"));
+ } else {
+ names.add(vf.createText("Event "+i,"en"));
+ names.add(vf.createText("Event "+i,"de"));
+ types.add(vf.createReference(NamespaceEnum.dbpediaOnt+"Event"));
+ }
+ Representation rep = vf.createRepresentation("http://www.example.com/entity/test#entity-"+i);
+ rep.add(NamespaceEnum.dcElements+"title", names);
+ rep.add(NamespaceEnum.rdf+"type", types);
+ rep.add(NamespaceEnum.dcElements+"created", new Date());
+ rep.add(NamespaceEnum.dcElements+"creator", IndexerTest.class.getSimpleName());
+ //use a random between [0..{MAX_INCOMMING}] as score
+ Integer incomming = Integer.valueOf((int)Math.round((Math.random()*MAX_INCOMMING)));
+ rep.add(RdfResourceEnum.signRank.getUri(), incomming);
+ testData.put(rep.getId(), rep);
+ }
+ }
+}
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/IndexerTest.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ResourceLoaderTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ResourceLoaderTest.java?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ResourceLoaderTest.java (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ResourceLoaderTest.java Fri Apr 29 09:20:31 2011
@@ -0,0 +1,175 @@
+package org.apache.stanbol.entityhub.indexing.core;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashSet;
+
+import org.apache.commons.io.FilenameUtils;
+import org.apache.commons.io.IOUtils;
+import org.apache.stanbol.entityhub.indexing.core.source.ResourceImporter;
+import org.apache.stanbol.entityhub.indexing.core.source.ResourceLoader;
+import org.apache.stanbol.entityhub.indexing.core.source.ResourceState;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import static org.junit.Assert.*;
+
+public class ResourceLoaderTest {
+ /**
+ * mvn copies the resources in "src/test/resources" to target/test-classes
+ */
+ private static final String TEST_CONFIGS_ROOT = "/target/test-classes/resourceLoaderTest/";
+ private static final String TEST_FOLDER_NAME = "testFolder/";
+ protected static Logger log = LoggerFactory.getLogger(ResourceLoaderTest.class);
+ private static String rootDir;
+
+ public static class DummyResourceImporter implements ResourceImporter {
+
+ Collection<String> expectedNames;
+ DummyResourceImporter(Collection<String> expectedResource){
+ this.expectedNames = new HashSet<String>();
+ for(String resource : expectedResource){
+ //this works only if there are not two files with the same name
+ //so add an assertion to check for that
+ String name = FilenameUtils.getName(resource);
+ assertFalse("This Test requires that there are no files with the same name!",
+ expectedNames.contains(name));
+ this.expectedNames.add(name);
+ }
+ }
+ @Override
+ public ResourceState importResource(InputStream is, String resourceName) throws IOException {
+ assertNotNull(is);
+ assertNotNull(resourceName);
+ assertFalse(resourceName.isEmpty());
+ assertTrue("resourceName '"+resourceName+"' not expected",
+ expectedNames.remove(resourceName));
+ IOUtils.closeQuietly(is);
+ log.debug("Import Resource {}",resourceName);
+ if(resourceName.startsWith("ignore")){
+ return ResourceState.IGNORED;
+ } else if(resourceName.startsWith("error")){
+ throw new IOException("To test an Error");
+ } else {
+ return ResourceState.LOADED;
+ }
+ }
+ public void checkAllProcessed(){
+ assertTrue(expectedNames.isEmpty());
+ }
+
+ }
+
+ @BeforeClass
+ public static void init(){
+ String baseDir = System.getProperty("basedir");
+ if(baseDir == null){
+ baseDir = System.getProperty("user.dir");
+ }
+ rootDir = baseDir+TEST_CONFIGS_ROOT;
+ }
+
+ @Test
+ public void testSingleFile(){
+ DummyResourceImporter importer = new DummyResourceImporter(
+ Arrays.asList(rootDir+"singleFileTest.txt"));
+ ResourceLoader loader = new ResourceLoader(importer, false,
+ new File(rootDir,"singleFileTest.txt"));
+ assertEquals(new HashSet<String>(Arrays.asList(rootDir+"singleFileTest.txt")),
+ loader.getResources(ResourceState.REGISTERED));
+ assertTrue(loader.getResources(ResourceState.ERROR).isEmpty());
+ assertTrue(loader.getResources(ResourceState.LOADED).isEmpty());
+ assertTrue(loader.getResources(ResourceState.IGNORED).isEmpty());
+ loader.loadResources();
+ assertEquals(new HashSet<String>(Arrays.asList(rootDir+"singleFileTest.txt")),
+ loader.getResources(ResourceState.LOADED));
+ assertTrue(loader.getResources(ResourceState.REGISTERED).isEmpty());
+ assertTrue(loader.getResources(ResourceState.IGNORED).isEmpty());
+ assertTrue(loader.getResources(ResourceState.ERROR).isEmpty());
+ importer.checkAllProcessed();
+
+ }
+ @Test
+ public void testFolderWithoutProcessingArchives(){
+ String folder = rootDir+TEST_FOLDER_NAME;
+ Collection<String> expectedFolderResources = new HashSet<String>(Arrays.asList(
+ folder+"archiveInFolder.zip",
+ folder+"archiveWithIgnore.zip",
+ folder+"archiveWithError.zip",
+ folder+"errorFileInFolder.txt",
+ folder+"fileInFolder.txt",
+ folder+"ignoreFileInFolder.txt",
+ folder+"otherFileInFolder.txt"));
+ DummyResourceImporter importer = new DummyResourceImporter(
+ expectedFolderResources);
+ ResourceLoader loader = new ResourceLoader(importer, false,
+ new File(rootDir,"testFolder"));
+ assertEquals(expectedFolderResources, loader.getResources(ResourceState.REGISTERED));
+ assertTrue(loader.getResources(ResourceState.ERROR).isEmpty());
+ assertTrue(loader.getResources(ResourceState.LOADED).isEmpty());
+ assertTrue(loader.getResources(ResourceState.IGNORED).isEmpty());
+ loader.loadResources();
+ assertEquals(new HashSet<String>(Arrays.asList(
+ folder+"archiveInFolder.zip", folder+"fileInFolder.txt",
+ folder+"otherFileInFolder.txt",folder+"archiveWithIgnore.zip",
+ folder+"archiveWithError.zip")),
+ loader.getResources(ResourceState.LOADED));
+ assertTrue(loader.getResources(ResourceState.REGISTERED).isEmpty());
+ assertEquals(new HashSet<String>(Arrays.asList(
+ folder+"errorFileInFolder.txt")),
+ loader.getResources(ResourceState.ERROR));
+ assertEquals(new HashSet<String>(Arrays.asList(
+ folder+"ignoreFileInFolder.txt")),
+ loader.getResources(ResourceState.IGNORED));
+ }
+ @Test
+ public void testFolderWithProcessingArchives(){
+ String folder = rootDir+TEST_FOLDER_NAME;
+ Collection<String> expectedResources = new HashSet<String>(Arrays.asList(
+ folder+"archiveInFolder.zip",
+ folder+"archiveWithIgnore.zip",
+ folder+"archiveWithError.zip",
+ folder+"errorFileInFolder.txt",
+ folder+"fileInFolder.txt",
+ folder+"ignoreFileInFolder.txt",
+ folder+"otherFileInFolder.txt"));
+ //the resourceNames send to the importer are now different because the
+ //archives are processed and the entries are sent to the ResourceImporter
+ Collection<String> expectedResourceNames = Arrays.asList(
+ "fileInArchive.txt", //part of archiveInFolder.zip
+ "otherFileInArchive.txt", //part of archiveInFolder.zip
+ "ignoreFileInArchive.txt", //part of archiveWithIgnore.zip
+ "errorFileInArchive.txt", //part of archiveWithError.zip
+ "errorFileInFolder.txt",
+ "fileInFolder.txt",
+ "ignoreFileInFolder.txt",
+ "otherFileInFolder.txt");
+ DummyResourceImporter importer = new DummyResourceImporter(
+ expectedResourceNames);
+ ResourceLoader loader = new ResourceLoader(importer, true,
+ new File(rootDir,TEST_FOLDER_NAME));
+
+ assertEquals(expectedResources, loader.getResources(ResourceState.REGISTERED));
+ assertTrue(loader.getResources(ResourceState.ERROR).isEmpty());
+ assertTrue(loader.getResources(ResourceState.LOADED).isEmpty());
+ assertTrue(loader.getResources(ResourceState.IGNORED).isEmpty());
+ loader.loadResources();
+ assertEquals(new HashSet<String>(Arrays.asList(
+ folder+"archiveInFolder.zip",
+ folder+"archiveWithIgnore.zip", //ignored files in archives are OK
+ folder+"fileInFolder.txt",folder+"otherFileInFolder.txt")),
+ loader.getResources(ResourceState.LOADED));
+ assertTrue(loader.getResources(ResourceState.REGISTERED).isEmpty());
+ assertEquals(new HashSet<String>(Arrays.asList(
+ folder+"errorFileInFolder.txt",
+ folder+"archiveWithError.zip")), //archive with errors MUST be ERROR
+ loader.getResources(ResourceState.ERROR));
+ assertEquals(new HashSet<String>(Arrays.asList(
+ folder+"ignoreFileInFolder.txt")),
+ loader.getResources(ResourceState.IGNORED));
+ }
+}
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ResourceLoaderTest.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/indexerTestMappings.txt
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/indexerTestMappings.txt?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/indexerTestMappings.txt (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/indexerTestMappings.txt Fri Apr 29 09:20:31 2011
@@ -0,0 +1,15 @@
+# --- Define the Languages for all fields ---
+| @=null;en;de;fr;it
+
+# --- RDF, RDFS and OWL Mappings ---
+rdf:*
+rdfs:*
+# convert rdf:type statements to References
+rdf:type | d=entityhub:ref
+
+# --- Dublin Core (dc terms and dc elements) ---
+dc:*
+# convert DC Elements to dc namespace
+dc-elements:title > dc:title
+dc-elements:created > dc:created
+dc-elements:creator > dc:creator
\ No newline at end of file
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/indexerTestMappings.txt
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/indexing.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/indexing.properties?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/indexing.properties (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/indexing.properties Fri Apr 29 09:20:31 2011
@@ -0,0 +1,27 @@
+name=test
+description=Indexer Test Configuration
+
+#used to iterate over the entity data
+entityDataIterable=org.apache.stanbol.entityhub.indexing.core.DummyEntityDataSource
+
+#used to provide entity data for a given id (not used for this test)
+#entityDataProvider=org.apache.stanbol.entityhub.indexing.core.DummyEntityDataSource
+
+#used to normalize scores
+scoreNormalizer=org.apache.stanbol.entityhub.indexing.core.normaliser.RangeNormaliser,config:range;org.apache.stanbol.entityhub.indexing.core.normaliser.NaturalLogNormaliser
+
+#used to iterate over Entities (not used by this test)
+#entityIdIterator=org.apache.stanbol.entityhub.indexing.core.LineBasedEntityIterator,source:testEntityScore.txt,charset:UTF-8,encodeIds:false
+
+#used to provide the score for Entities
+entityScoreProvider=org.apache.stanbol.entityhub.indexing.core.source.EntityFieldScoreProvider
+
+#used to process indexed Entities
+#will use the mappings configured for "fieldConfiguration" if not otherwise specified
+entityProcessor=org.apache.stanbol.entityhub.indexing.core.processor.FiledMapperProcessor
+
+#used to store the configuration of the index within the yard
+fieldConfiguration=indexerTestMappings.txt
+
+#The destination responsible to store the indexed entities
+indexingDestination=org.apache.stanbol.entityhub.indexing.core.DummyIndexingDestination
\ No newline at end of file
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/indexing.properties
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/minIncomming.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/minIncomming.properties?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/minIncomming.properties (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/minIncomming.properties Fri Apr 29 09:20:31 2011
@@ -0,0 +1,2 @@
+inclusive=true
+min-score=2
\ No newline at end of file
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/minIncomming.properties
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/range.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/range.properties?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/range.properties (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/range.properties Fri Apr 29 09:20:31 2011
@@ -0,0 +1,4 @@
+#this value is normalized by all chained normalizers
+max-expected-score=10000
+#the upper bound of the resulting range [0..{upper-bound}]
+upper-bound=100
\ No newline at end of file
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/dataIterating/indexing/config/range.properties
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/indexerTestMappings.txt
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/indexerTestMappings.txt?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/indexerTestMappings.txt (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/indexerTestMappings.txt Fri Apr 29 09:20:31 2011
@@ -0,0 +1,15 @@
+# --- Define the Languages for all fields ---
+| @=null;en;de;fr;it
+
+# --- RDF, RDFS and OWL Mappings ---
+rdf:*
+rdfs:*
+# convert rdf:type statements to References
+rdf:type | d=entityhub:ref
+
+# --- Dublin Core (dc terms and dc elements) ---
+dc:*
+# convert DC Elements to dc namespace
+dc-elements:title > dc:title
+dc-elements:created > dc:created
+dc-elements:creator > dc:creator
\ No newline at end of file
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/indexerTestMappings.txt
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/indexing.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/indexing.properties?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/indexing.properties (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/indexing.properties Fri Apr 29 09:20:31 2011
@@ -0,0 +1,27 @@
+name=test
+description=Indexer Test Configuration
+
+#used to iterate over the entity data (not used for this test)
+#entityDataIterable=org.apache.stanbol.entityhub.indexing.core.DummyEntityDataSource
+
+#used to provide entity data for a given id
+entityDataProvider=org.apache.stanbol.entityhub.indexing.core.DummyEntityDataSource
+
+#used to normalize scores
+scoreNormalizer=org.apache.stanbol.entityhub.indexing.core.normaliser.RangeNormaliser,config:range;org.apache.stanbol.entityhub.indexing.core.normaliser.NaturalLogNormaliser
+
+#used to iterate over Entities
+entityIdIterator=org.apache.stanbol.entityhub.indexing.core.DummyEntityIdSource
+
+#used to provide the score for Entities
+#entityScoreProvider=org.apache.stanbol.entityhub.indexing.core.source.EntityFieldScoreProvider
+
+#used to process indexed Entities
+#will use the mappings configured for "fieldConfiguration" if not otherwise specified
+entityProcessor=org.apache.stanbol.entityhub.indexing.core.processor.FiledMapperProcessor
+
+#used to store the configuration of the index within the yard
+fieldConfiguration=indexerTestMappings.txt
+
+#The destination responsible to store the indexed entities
+indexingDestination=org.apache.stanbol.entityhub.indexing.core.DummyIndexingDestination
\ No newline at end of file
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/indexing.properties
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/minIncomming.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/minIncomming.properties?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/minIncomming.properties (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/minIncomming.properties Fri Apr 29 09:20:31 2011
@@ -0,0 +1,2 @@
+inclusive=true
+min-score=2
\ No newline at end of file
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/minIncomming.properties
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/range.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/range.properties?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/range.properties (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/range.properties Fri Apr 29 09:20:31 2011
@@ -0,0 +1,4 @@
+#this value is normalized by all chained normalizers
+max-expected-score=10000
+#the upper bound of the resulting range [0..{upper-bound}]
+upper-bound=100
\ No newline at end of file
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/indexerTests/idIterating/indexing/config/range.properties
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/singleFileTest.txt
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/singleFileTest.txt?rev=1097740&view=auto
==============================================================================
(empty)
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/singleFileTest.txt
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/archiveInFolder.zip
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/archiveInFolder.zip?rev=1097740&view=auto
==============================================================================
Binary file - no diff available.
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/archiveInFolder.zip
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/archiveWithError.zip
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/archiveWithError.zip?rev=1097740&view=auto
==============================================================================
Binary file - no diff available.
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/archiveWithError.zip
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/archiveWithIgnore.zip
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/archiveWithIgnore.zip?rev=1097740&view=auto
==============================================================================
Binary file - no diff available.
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/archiveWithIgnore.zip
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/errorFileInFolder.txt
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/errorFileInFolder.txt?rev=1097740&view=auto
==============================================================================
(empty)
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/errorFileInFolder.txt
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/fileInFolder.txt
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/fileInFolder.txt?rev=1097740&view=auto
==============================================================================
(empty)
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/fileInFolder.txt
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/ignoreFileInFolder.txt
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/ignoreFileInFolder.txt?rev=1097740&view=auto
==============================================================================
(empty)
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/ignoreFileInFolder.txt
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/otherFileInFolder.txt
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/otherFileInFolder.txt?rev=1097740&view=auto
==============================================================================
(empty)
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/resourceLoaderTest/testFolder/otherFileInFolder.txt
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/dbPediaMappings.txt
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/dbPediaMappings.txt?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/dbPediaMappings.txt (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/dbPediaMappings.txt Fri Apr 29 09:20:31 2011
@@ -0,0 +1,61 @@
+# --- Define the Languages for all fields ---
+| @=null;en;de;fr;it
+
+# --- RDF, RDFS and OWL Mappings ---
+rdfs:label
+rdfs:comment
+rdf:type | d=entityhub:ref
+# used by LOD to link to URIs used to identify the same Entity
+owl:sameAs | d=entityhub:ref
+
+# --- Dublin Core (dc terms and dc elements) ---
+dc:*
+# all DC Elements (one could also define the mappings to the DC Terms counterparts here
+dc-elements:*
+
+# --- Spatial Things ---
+geo:lat | d=xsd:double
+geo:long | d=xsd:double
+geo:alt | d=xsd:int;xsd:float
+# one can also copy the valued from the DBpedia properties
+#dbp-prop:latitude | d=xsd:decimal > geo:lat
+#dbp-prop:longitude | d=xsd:decimal > geo:long
+
+# --- Thesaurus (via SKOS) ---
+#SKOS can be used to define hierarchical terminologies
+skos:*
+skos:broader | d=entityhub:ref
+skos:narrower | d=entityhub:ref
+skos:related | d=entityhub:ref
+skos:member | d=entityhub:ref
+skos:subject | d=entityhub:ref
+skos:inScheme | d=entityhub:ref
+skos:hasTopConcept | d=entityhub:ref
+skos:topConceptOf | d=entityhub:ref
+
+# --- Social Networks (via foaf) ---
+#The Friend of a Friend schema often used to describe social relations between people
+foaf:*
+foaf:knows | d=entityhub:ref
+foaf:made | d=entityhub:ref
+foaf:maker | d=entityhub:ref
+foaf:member | d=entityhub:ref
+foaf:homepage | d=xsd:anyURI
+# also use the DBpedia property website for oaf:homepage!
+dbp-prop:website | d=xsd:anyURI > foaf:homepage
+foaf:depiction | d=xsd:anyURI
+# also use the DBpedia thumbnail as oaf:depiction
+dbp-ont:thumbnail | d=xsd:anyURI > foaf:depiction
+foaf:img | d=xsd:anyURI
+foaf:logo | d=xsd:anyURI
+# Documents about the entity
+foaf:page | d=xsd:anyURI
+
+# --- dbpedia specific
+# the "dbp-ont" defines knowledge mapped to the DBPedia ontology
+dbp-ont:*
+# the "DBpedia properties are all key values pairs extracted from the info boxes
+# on the right hand side of Wikipedia pages.
+#dbp-prop:*
+# Copy only population for now (one could add additional if necessary)!
+dbp-prop:population | d=xsd:integer
\ No newline at end of file
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/dbPediaMappings.txt
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/indexFieldConfig.txt
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/indexFieldConfig.txt?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/indexFieldConfig.txt (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/indexFieldConfig.txt Fri Apr 29 09:20:31 2011
@@ -0,0 +1,28 @@
+#This is a similar version of the dbPediaMappings.txt but excludes mapping rules
+# --- Define the Languages for all fields ---
+| @=null;en;de;fr;it
+
+# --- RDF, RDFS and OWL Mappings ---
+rdfs:label
+rdfs:comment
+rdf:type
+# used by LOD to link to URIs used to identify the same Entity
+owl:sameAs
+
+# --- Other Namespaces to include---
+dc:*
+dc-elements:*
+geo:*
+skos:*
+foaf:*
+
+# --- dbpedia specific
+# the "dbp-ont" defines knowledge mapped to the DBPedia ontology
+dbp-ont:*
+
+# the "DBpedia properties are all key values pairs extracted from the info boxes
+# on the right hand side of Wikipedia pages.
+# uncomment the next line to include all
+#dbp-prop:*
+# Currently only the population is added
+dbp-prop:population
\ No newline at end of file
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/indexFieldConfig.txt
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/indexing.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/indexing.properties?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/indexing.properties (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/indexing.properties Fri Apr 29 09:20:31 2011
@@ -0,0 +1,9 @@
+name=simple
+description=Simple Configuration
+
+scoreNormalizer=org.apache.stanbol.entityhub.indexing.core.normaliser.RangeNormaliser,config:range;org.apache.stanbol.entityhub.indexing.core.normaliser.NaturalLogNormaliser;org.apache.stanbol.entityhub.indexing.core.normaliser.MinScoreNormalizer,config:minIncomming
+
+entityIdIterator=org.apache.stanbol.entityhub.indexing.core.source.LineBasedEntityIterator,source:testEntityScore.txt,charset:UTF-8,encodeIds:false
+
+entityProcessor=org.apache.stanbol.entityhub.indexing.core.processor.FiledMapperProcessor,mappings:dbPediaMappings.txt
+fieldConfiguration=indexFieldConfig.txt
\ No newline at end of file
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/indexing.properties
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/minIncomming.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/minIncomming.properties?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/minIncomming.properties (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/minIncomming.properties Fri Apr 29 09:20:31 2011
@@ -0,0 +1,2 @@
+inclusive=true
+min-score=2
\ No newline at end of file
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/minIncomming.properties
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/range.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/range.properties?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/range.properties (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/range.properties Fri Apr 29 09:20:31 2011
@@ -0,0 +1 @@
+upper-bound=10
\ No newline at end of file
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/config/range.properties
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/resources/testEntityScore.txt
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/resources/testEntityScore.txt?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/resources/testEntityScore.txt (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/resources/testEntityScore.txt Fri Apr 29 09:20:31 2011
@@ -0,0 +1,3 @@
+http://www.example.org/entity/test 100
+http://www.example.org/entity/test2 10
+http://www.example.org/entity/test3 1
\ No newline at end of file
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/test/resources/testConfigs/simple/indexing/resources/testEntityScore.txt
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: incubator/stanbol/trunk/entityhub/indexing/dblp/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/dblp/pom.xml?rev=1097740&r1=1097739&r2=1097740&view=diff
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/dblp/pom.xml (original)
+++ incubator/stanbol/trunk/entityhub/indexing/dblp/pom.xml Fri Apr 29 09:20:31 2011
@@ -19,14 +19,17 @@
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
+<!-- Do not use a parent because of problems with missing dependencies with
+ mvn assembly:assembly
<parent>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.entityhub.parent</artifactId>
<version>0.9-SNAPSHOT</version>
<relativePath>../../parent</relativePath>
- </parent>
+ </parent> -->
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.entityhub.indexing.dblp</artifactId>
+ <version>0.9-SNAPSHOT</version>
<packaging>jar</packaging>
<name>Apache Stanbol Entityhub Indexing for dblp</name>
<description>This uses the RDF dump provided by
Modified: incubator/stanbol/trunk/entityhub/indexing/dbpedia/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/dbpedia/pom.xml?rev=1097740&r1=1097739&r2=1097740&view=diff
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/dbpedia/pom.xml (original)
+++ incubator/stanbol/trunk/entityhub/indexing/dbpedia/pom.xml Fri Apr 29 09:20:31 2011
@@ -19,7 +19,9 @@
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
-<!-- <parent>
+<!-- Do not use a parent because of problems with missing dependencies with
+ mvn assembly:assembly
+ <parent>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.entityhub.parent</artifactId>
<version>0.9-SNAPSHOT</version>