You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/03/28 13:02:23 UTC
svn commit: r1306251 - in /incubator/stanbol/trunk/entityhub: indexing/core/
indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/
indexing/genericrdf/
indexing/genericrdf/src/main/resources/indexing/config/ ldpath/src/main/...
Author: rwesten
Date: Wed Mar 28 11:02:23 2012
New Revision: 1306251
URL: http://svn.apache.org/viewvc?rev=1306251&view=rev
Log:
STANBOL-556: Adds support for LDpath processor to the Entityhub Indexing tools
* added a SingleRepresentationBackend (LDPath Backend for a single Representation)
* added the processor implementation
* added documentation for the new processsor to the indexing.properties of the genericrdf indexer
other:
* improved documentation for the EntiyTypeFilter
* added a default configuration for the EntityTypeFileter
Added:
incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/LdpathProcessor.java (with props)
incubator/stanbol/trunk/entityhub/indexing/genericrdf/src/main/resources/indexing/config/entityTypes.properties (with props)
incubator/stanbol/trunk/entityhub/ldpath/src/main/java/org/apache/stanbol/entityhub/ldpath/backend/SingleRepresentationBackend.java (with props)
Modified:
incubator/stanbol/trunk/entityhub/indexing/core/pom.xml
incubator/stanbol/trunk/entityhub/indexing/genericrdf/pom.xml
incubator/stanbol/trunk/entityhub/indexing/genericrdf/src/main/resources/indexing/config/indexing.properties
incubator/stanbol/trunk/entityhub/ldpath/src/main/java/org/apache/stanbol/entityhub/ldpath/EntityhubLDPath.java
incubator/stanbol/trunk/entityhub/ldpath/src/test/java/org/apache/stanbol/entityhub/ldpath/backend/BackendTest.java
Modified: incubator/stanbol/trunk/entityhub/indexing/core/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/pom.xml?rev=1306251&r1=1306250&r2=1306251&view=diff
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/pom.xml (original)
+++ incubator/stanbol/trunk/entityhub/indexing/core/pom.xml Wed Mar 28 11:02:23 2012
@@ -98,6 +98,10 @@
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.entityhub.core</artifactId>
</dependency>
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.entityhub.ldpath</artifactId>
+ </dependency>
<dependency> <!-- for the Main (command line utility) -->
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
Added: incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/LdpathProcessor.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/LdpathProcessor.java?rev=1306251&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/LdpathProcessor.java (added)
+++ incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/LdpathProcessor.java Wed Mar 28 11:02:23 2012
@@ -0,0 +1,139 @@
+package org.apache.stanbol.entityhub.indexing.core.processor;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.nio.charset.Charset;
+import java.util.Iterator;
+import java.util.Map;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.stanbol.entityhub.core.model.InMemoryValueFactory;
+import org.apache.stanbol.entityhub.indexing.core.EntityProcessor;
+import org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig;
+import org.apache.stanbol.entityhub.ldpath.EntityhubLDPath;
+import org.apache.stanbol.entityhub.ldpath.backend.SingleRepresentationBackend;
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import at.newmedialab.ldpath.exception.LDPathParseException;
+import at.newmedialab.ldpath.model.programs.Program;
+
+public class LdpathProcessor implements EntityProcessor{
+
+ private final Logger log = LoggerFactory.getLogger(LdpathProcessor.class);
+ /**
+ * The reference to the file containing the LDPath statement used for
+ * processing. The path is evaluated relative to the config directory
+ * of the indexing
+ */
+ public static final String PARAMETER_LD_PATH = "ldpath";
+ /**
+ * If results of the LDPath transformation are appended to the incoming
+ * representation, or if the incoming Representation is replaced by the
+ * results of the LDPath program (default is append).
+ */
+ public static final String PARAMETER_APPEND = "append";
+ /**
+ * By default appending of LDPath results to the parsed Representation is
+ * activeted
+ */
+ private static final boolean DEFAULT_APPEND_MODE = true;
+
+ private final ValueFactory vf;
+ private final EntityhubLDPath ldPath;
+ private final SingleRepresentationBackend backend;
+ private Program<Object> program;
+ private boolean appendMode;
+
+ public LdpathProcessor(){
+ vf = InMemoryValueFactory.getInstance();
+ this.backend = new SingleRepresentationBackend(vf);
+ this.ldPath = new EntityhubLDPath(backend);
+ }
+
+ @Override
+ public boolean needsInitialisation() {
+ return false;
+ }
+
+ @Override
+ public void initialise() {
+ }
+
+ @Override
+ public void close() {
+ }
+
+ @Override
+ public Representation process(Representation source) {
+ if(source == null){
+ return null;
+ }
+ backend.setRepresentation(source);
+ Representation result = ldPath.execute(vf.createReference(source.getId()), program);
+ if(appendMode){
+ Iterator<String> fields = result.getFieldNames();
+ while(fields.hasNext()){
+ String field = fields.next();
+ source.add(field, result.get(field));
+ }
+ return source;
+ } else {
+ return result;
+ }
+ }
+
+
+ @Override
+ public void setConfiguration(Map<String,Object> config) {
+ IndexingConfig indexingConfig = (IndexingConfig)config.get(IndexingConfig.KEY_INDEXING_CONFIG);
+ //parse the ldpath
+ final File ldpathFile;
+ Object value = config.get(PARAMETER_LD_PATH);
+ if(value != null && !value.toString().isEmpty()){
+ ldpathFile = indexingConfig.getConfigFile(value.toString());
+ if(ldpathFile == null || !ldpathFile.exists()){
+ throw new IllegalArgumentException("Configured '"
+ + PARAMETER_LD_PATH +"' file was not found!");
+ }
+ if(!ldpathFile.isFile()){
+ throw new IllegalArgumentException("Configured '"
+ + PARAMETER_LD_PATH +"' file exists but is not a File!");
+ }
+ } else {
+ throw new IllegalArgumentException("Missing required configuration '"
+ + PARAMETER_LD_PATH +"' - the file containing the LDPath program used by this "
+ + LdpathProcessor.class.getSimpleName()+"!");
+ }
+ Reader in = null;
+ try {
+ in = new InputStreamReader(new FileInputStream(ldpathFile), Charset.forName("UTF-8"));
+ this.program = ldPath.parseProgram(in);
+ log.info("ldpath program: \n{}\n",program.getPathExpression(backend));
+ } catch (IOException e) {
+ throw new IllegalStateException("Unabwle to read LDPath program from configured file '"
+ + ldpathFile +"'!",e);
+ } catch (LDPathParseException e) {
+ throw new IllegalStateException("Unable to parse LDPath program from configured file '"
+ + ldpathFile +"'!",e);
+ } finally {
+ IOUtils.closeQuietly(in);
+ }
+ value = config.get(PARAMETER_APPEND);
+ if(value instanceof Boolean){
+ this.appendMode = ((Boolean) value).booleanValue();
+ } else if(value != null && !value.toString().isEmpty()){
+ this.appendMode = Boolean.parseBoolean(value.toString());
+ } else {
+ this.appendMode = DEFAULT_APPEND_MODE;
+ }
+ }
+
+
+}
Propchange: incubator/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/LdpathProcessor.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: incubator/stanbol/trunk/entityhub/indexing/genericrdf/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/genericrdf/pom.xml?rev=1306251&r1=1306250&r2=1306251&view=diff
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/genericrdf/pom.xml (original)
+++ incubator/stanbol/trunk/entityhub/indexing/genericrdf/pom.xml Wed Mar 28 11:02:23 2012
@@ -328,11 +328,11 @@
<artifactId>commons-fileupload</artifactId>
<scope>runtime</scope>
</dependency>
- <dependency>
+ <!-- already included <dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<scope>runtime</scope>
- </dependency>
+ </dependency> -->
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
Added: incubator/stanbol/trunk/entityhub/indexing/genericrdf/src/main/resources/indexing/config/entityTypes.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/genericrdf/src/main/resources/indexing/config/entityTypes.properties?rev=1306251&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/genericrdf/src/main/resources/indexing/config/entityTypes.properties (added)
+++ incubator/stanbol/trunk/entityhub/indexing/genericrdf/src/main/resources/indexing/config/entityTypes.properties Wed Mar 28 11:02:23 2012
@@ -0,0 +1,26 @@
+#Configuration for the FieldValueFilter
+
+#This can be used to configure specific rdf:types to be indexed. Entities with
+#other types will be filtered and not be included in the local DBpedia.org
+#index
+
+#How to configure
+
+#The key 'field' can be used to configure the field the filters are applied
+# - 'rdf:type' is used as default for the field
+# - Only a single field is supported. However one can configure multiple instances
+# with different configurations in the 'indexing.properties' file.
+# - It is possible to use a full URI or prefix:localname for all prefixes registered
+# in 'org.apache.stanbol.entityhub.servicesapi.defaults.NamespaceEnum'
+
+#field=rdf:type
+
+#The key 'values' is used to specify the filter
+# - This is a required configuration.
+# - The value '*' deactivates filtering
+# - Multiple types are supported. Configurations are separated by ';'
+# - It is possible to use full URIs are prefix:local name for all prefixes registered
+# in 'org.apache.stanbol.entityhub.servicesapi.defaults.NamespaceEnum'
+
+#This deactivate filtering
+values=*
\ No newline at end of file
Propchange: incubator/stanbol/trunk/entityhub/indexing/genericrdf/src/main/resources/indexing/config/entityTypes.properties
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: incubator/stanbol/trunk/entityhub/indexing/genericrdf/src/main/resources/indexing/config/indexing.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/genericrdf/src/main/resources/indexing/config/indexing.properties?rev=1306251&r1=1306250&r2=1306251&view=diff
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/genericrdf/src/main/resources/indexing/config/indexing.properties (original)
+++ incubator/stanbol/trunk/entityhub/indexing/genericrdf/src/main/resources/indexing/config/indexing.properties Wed Mar 28 11:02:23 2012
@@ -91,17 +91,63 @@ entityScoreProvider=org.apache.stanbol.e
# Entity Processor
-# Currently the only available implementation is the FiledMapperProcessor.
-entityProcessor=org.apache.stanbol.entityhub.indexing.core.processor.FiledMapperProcessor
+# Multiple Entity processors can be used for indexing entities. The are separated by ';'
+# and are executed in the order of definition.
+
+# FiledMapperProcessor:
+#
+# entityProcessor=org.apache.stanbol.entityhub.indexing.core.processor.FiledMapperProcessor
+#
+# This processor ensures that "field mappings" are executed while indexing entities.
# By default it will use the mappings configured by the "fieldConfiguraton"
# property. To use other mappings one can use the "mappings" parameter (e.g.
# mappings:otherMappings.txt
-# A default mapping configuration is provided. This file also includes a lot of
-# comments.
+# FieldValueFilter
+#
+#entityProcessor=org.apache.stanbol.entityhub.indexing.core.processor.FieldValueFilter,config:entityTypes
+#
+# This allows to define a field and values that are used to filter entities. Only Entities
+# that do have one of the defined values as actual value of the defined field will
+# get indexed. This is typically used to filter entities by rdf:type, but can be used
+# for any URI property. See the default entityTypes.properties file for more information
+
+# LdpathProcessor
+#
+# This allows to use simple LDpath statements to process entities. Such as mapping
+# only properties of entities with a specific type
+#
+# skos:prefLabel = .[rdf:type is <http://example.org/MyType>]/rdfs:label;
+#
+# Praameters:
+# * append:[true/flase] (default=true) If the result of the LDpath program is
+# appended to the processed entity or if the processed entity should be
+# replaced with the results of the LDpath program
+# * ldpath:{file} (required, no default) The {file} containing the LDpath
+# program used by this processor. {file} is relative to the config directory.
+#
+# NOTEs:
+# * The LdpathProcessor has only access to the local properties of the currently
+# indexed entity. LDPath statements that refer other information such as paths
+# with a lengths > 1 or inverse properties will not work
+# * Processors can be chained by defining multiple Processor instances in the
+# configuration and separating them with ';'. This allows to use multiple
+# LdpathProcessor instances and/or to chain LdpathProcessor(s) with others
+# such as the "FiledMapperProcessor". Processors are executed as defined
+# within the configuration of the "entityProcessor" property.
+# * When using the FiledMapperProcessor on results of the LdpathProcessor make
+# sure that the fields defined in the LDpath statements are indexed by the
+# FiledMapperProcessor. Otherwise such values will NOT be indexed!
+# org.apache.stanbol.entityhub.indexing.core.processor.LdpathProcessor,ldpath:ldpath-mapping.txt,append:true
-# to deactivate EntityProcessing one can use
+# EmptyProcessor
+#
#entityProcessor=org.apache.stanbol.entityhub.indexing.core.processor.EmptyProcessor
+#
+# This processor can be used to deactivate EntityProcessing
+
+# Default Entity Processor configuration
+entityProcessor=org.apache.stanbol.entityhub.indexing.core.processor.FieldValueFilter,config:entityTypes;org.apache.stanbol.entityhub.indexing.core.processor.FiledMapperProcessor
# Index Field Configuration
Modified: incubator/stanbol/trunk/entityhub/ldpath/src/main/java/org/apache/stanbol/entityhub/ldpath/EntityhubLDPath.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/ldpath/src/main/java/org/apache/stanbol/entityhub/ldpath/EntityhubLDPath.java?rev=1306251&r1=1306250&r2=1306251&view=diff
==============================================================================
--- incubator/stanbol/trunk/entityhub/ldpath/src/main/java/org/apache/stanbol/entityhub/ldpath/EntityhubLDPath.java (original)
+++ incubator/stanbol/trunk/entityhub/ldpath/src/main/java/org/apache/stanbol/entityhub/ldpath/EntityhubLDPath.java Wed Mar 28 11:02:23 2012
@@ -16,6 +16,8 @@
*/
package org.apache.stanbol.entityhub.ldpath;
+import java.util.Collection;
+
import org.apache.stanbol.entityhub.core.mapping.ValueConverterFactory.AnyUriConverter;
import org.apache.stanbol.entityhub.core.mapping.ValueConverterFactory.ReferenceConverter;
import org.apache.stanbol.entityhub.core.mapping.ValueConverterFactory.TextConverter;
@@ -111,7 +113,10 @@ public class EntityhubLDPath extends LDP
}
Representation result = vf.createRepresentation(context.getReference());
for(FieldMapping<?,Object> mapping : program.getFields()) {
- result.add(mapping.getFieldName(),mapping.getValues(backend,context));
+ Collection<?> values = mapping.getValues(backend,context);
+ if(values !=null && !values.isEmpty()){
+ result.add(mapping.getFieldName(),values);
+ }
}
return result;
Added: incubator/stanbol/trunk/entityhub/ldpath/src/main/java/org/apache/stanbol/entityhub/ldpath/backend/SingleRepresentationBackend.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/ldpath/src/main/java/org/apache/stanbol/entityhub/ldpath/backend/SingleRepresentationBackend.java?rev=1306251&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/ldpath/src/main/java/org/apache/stanbol/entityhub/ldpath/backend/SingleRepresentationBackend.java (added)
+++ incubator/stanbol/trunk/entityhub/ldpath/src/main/java/org/apache/stanbol/entityhub/ldpath/backend/SingleRepresentationBackend.java Wed Mar 28 11:02:23 2012
@@ -0,0 +1,70 @@
+package org.apache.stanbol.entityhub.ldpath.backend;
+
+import java.util.Collections;
+
+import org.apache.stanbol.entityhub.core.model.InMemoryValueFactory;
+import org.apache.stanbol.entityhub.core.query.FieldQueryImpl;
+import org.apache.stanbol.entityhub.core.query.QueryResultListImpl;
+import org.apache.stanbol.entityhub.servicesapi.EntityhubException;
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory;
+import org.apache.stanbol.entityhub.servicesapi.query.FieldQuery;
+import org.apache.stanbol.entityhub.servicesapi.query.QueryResultList;
+
+/**
+ * Allows to execute ldpath on the data of a single Representation. Will not
+ * support paths longer that <code>1</code> but might still be very usefull
+ * to select/filter specific values of fields.
+ * @author Rupert Westenthaler
+ *
+ */
+public class SingleRepresentationBackend extends AbstractBackend {
+
+
+ Representation representation;
+ private final ValueFactory valueFactory;
+
+ private static final FieldQuery DUMMY_QUERY = new FieldQueryImpl();
+ @SuppressWarnings("unchecked")
+ private static final QueryResultList<String> EMPTY_RESULT =
+ new QueryResultListImpl<String>(DUMMY_QUERY, Collections.EMPTY_LIST, String.class);
+
+ public SingleRepresentationBackend() {
+ this(null);
+ }
+ public SingleRepresentationBackend(ValueFactory vf){
+ if(vf == null){
+ this.valueFactory = InMemoryValueFactory.getInstance();
+ } else {
+ this.valueFactory = vf;
+ }
+ }
+ public void setRepresentation(Representation r){
+ if(r != null){
+ this.representation = r;
+ } else {
+ throw new IllegalArgumentException("The parsed Representation MUST NOT be NULL!");
+ }
+ }
+
+ @Override
+ protected ValueFactory getValueFactory() {
+ return valueFactory;
+ }
+
+ @Override
+ protected Representation getRepresentation(String id) throws EntityhubException {
+ return representation.getId().equals(id) ? representation : null;
+ }
+
+ @Override
+ protected QueryResultList<String> query(FieldQuery query) throws EntityhubException {
+ return EMPTY_RESULT;
+ }
+
+ @Override
+ protected FieldQuery createQuery() {
+ return DUMMY_QUERY;
+ }
+
+}
Propchange: incubator/stanbol/trunk/entityhub/ldpath/src/main/java/org/apache/stanbol/entityhub/ldpath/backend/SingleRepresentationBackend.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: incubator/stanbol/trunk/entityhub/ldpath/src/test/java/org/apache/stanbol/entityhub/ldpath/backend/BackendTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/ldpath/src/test/java/org/apache/stanbol/entityhub/ldpath/backend/BackendTest.java?rev=1306251&r1=1306250&r2=1306251&view=diff
==============================================================================
--- incubator/stanbol/trunk/entityhub/ldpath/src/test/java/org/apache/stanbol/entityhub/ldpath/backend/BackendTest.java (original)
+++ incubator/stanbol/trunk/entityhub/ldpath/src/test/java/org/apache/stanbol/entityhub/ldpath/backend/BackendTest.java Wed Mar 28 11:02:23 2012
@@ -18,6 +18,8 @@ package org.apache.stanbol.entityhub.ldp
import static org.apache.stanbol.entityhub.ldpath.LDPathUtils.getReader;
import static org.junit.Assert.assertNotNull;
+
+import java.io.StringReader;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
@@ -25,6 +27,8 @@ import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import org.apache.stanbol.entityhub.ldpath.impl.LDPathTestBase;
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+import org.junit.Assert;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -168,4 +172,30 @@ public class BackendTest extends LDPathT
log.info("Assert LDPath Result for {}:", EXPECTED_HARVARD_ALUMNI);
assertLDPathResult(result,EXPECTED_HARVARD_ALUMNI);
}
+ @Test
+ public void testSingleRepresentationBackend() throws Exception {
+ Representation paris = yard.getRepresentation("http://dbpedia.org/resource/Paris");
+ assertNotNull(paris);
+ SingleRepresentationBackend backend = new SingleRepresentationBackend();
+ backend.setRepresentation(paris);
+ LDPath<Object> ldPath = new LDPath<Object>(backend);
+ StringBuilder sb = new StringBuilder();
+ sb.append("myTest = .[rdf:type is <http://dbpedia.org/ontology/Place>]/rdfs:label;");
+ Program<Object> program = ldPath.parseProgram(new StringReader(sb.toString()));
+ Map<String,Collection<?>> result = program.execute(backend, yard.getValueFactory().createReference(paris.getId()));
+ Assert.assertNotNull(result);
+ Assert.assertTrue(result.containsKey("myTest"));
+ Collection<?> values = result.get("myTest");
+ Assert.assertNotNull(values);
+ Assert.assertFalse(values.isEmpty());
+
+ sb = new StringBuilder();
+ sb.append("myTest = .[rdf:type is <http://dbpedia.org/ontology/Place2>]/rdfs:label;");
+ program = ldPath.parseProgram(new StringReader(sb.toString()));
+ result = program.execute(backend, yard.getValueFactory().createReference(paris.getId()));
+ Assert.assertNotNull(result);
+ values = result.get("myTest");
+ Assert.assertTrue(values == null || values.isEmpty());
+
+ }
}