You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/03/03 22:17:48 UTC

svn commit: r1296705 - in /incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata: ConstantMapping.java Mapping.java OntologyMappings.java PropertyMapping.java ResourceMapping.java TypeMapping.java

Author: rwesten
Date: Sat Mar  3 21:17:48 2012
New Revision: 1296705

URL: http://svn.apache.org/viewvc?rev=1296705&view=rev
Log:
STANBOL-512: Added support for Tika metadata -> Ontology mappings


Added:
    incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/ConstantMapping.java   (with props)
    incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/Mapping.java   (with props)
    incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/OntologyMappings.java   (with props)
    incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/PropertyMapping.java   (with props)
    incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/ResourceMapping.java   (with props)
    incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/TypeMapping.java   (with props)

Added: incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/ConstantMapping.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/ConstantMapping.java?rev=1296705&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/ConstantMapping.java (added)
+++ incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/ConstantMapping.java Sat Mar  3 21:17:48 2012
@@ -0,0 +1,61 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.stanbol.enhancer.engines.tika.metadata;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.tika.metadata.Metadata;
+
+public class ConstantMapping extends Mapping{
+
+    
+    private Collection<Resource> values;
+
+    public ConstantMapping(UriRef ontProperty, Resource...values) {
+        super(ontProperty, null);
+        if(values == null || values.length < 1){
+            throw new IllegalArgumentException("The parsed values MUST NOT be NULL nor an empty array");
+        }
+        this.values = Arrays.asList(values);
+        if(this.values.contains(null)){
+            throw new IllegalArgumentException("The parsed values MUST NOT contain a NULL element " +
+            		"(parsed: "+this.values+")!");
+        }
+    }
+
+    @Override
+    public boolean apply(MGraph graph, NonLiteral subject, Metadata metadata) {
+        for(Resource value : values){
+            graph.add(new TripleImpl(subject, ontProperty, value));
+            mappingLogger.log(subject, ontProperty, null, value);
+        }
+        return true;
+    }
+
+    @Override
+    public Set<String> getMappedTikaProperties() {
+        return Collections.emptySet();
+    }
+}

Propchange: incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/ConstantMapping.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/Mapping.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/Mapping.java?rev=1296705&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/Mapping.java (added)
+++ incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/Mapping.java Sat Mar  3 21:17:48 2012
@@ -0,0 +1,310 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.stanbol.enhancer.engines.tika.metadata;
+
+import static org.apache.tika.metadata.DublinCore.DATE;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.BNode;
+import org.apache.clerezza.rdf.core.InvalidLiteralTypeException;
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NoConvertorException;
+import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.PlainLiteral;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.TypedLiteral;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.clerezza.rdf.core.impl.TypedLiteralImpl;
+import org.apache.clerezza.rdf.ontologies.RDFS;
+import org.apache.clerezza.rdf.ontologies.XSD;
+import org.apache.tika.metadata.DublinCore;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Used as value for Apache Tika {@link Metadata} mappings. Holds the
+ * ontology property as {@link UriRef} and optionally a Tika {@link Property}.
+ * Later can be used to parse the correct datatype for values contained in the
+ * {@link Metadata}
+ * 
+ * @author westei
+ *
+ */
+public abstract class Mapping {
+    
+    private final static Logger log = LoggerFactory.getLogger(Mapping.class);
+    private static final LiteralFactory lf = LiteralFactory.getInstance();
+
+    /**
+     * List with allowed DataTypes.<ul>
+     * <li> <code>null</code> is used for {@link PlainLiteral}s
+     * <li> {@link XSD} datatyoes are used for {@link TypedLiteral}s
+     * <li> {@link RDFS#Resource} is used for {@link NonLiteral} values. Note
+     * that only {@link UriRef} is supported, because for Tika {@link BNode}s
+     * do not make sense.
+     * </ul>
+     */
+    public static final Set<UriRef> ONT_TYPES;
+    /**
+     * Map with the same keys as contained in {@link #ONT_TYPES}. The values
+     * are the java types.
+     */
+    protected static final Map<UriRef,Class<?>> ONT_TYPE_MAP;
+    
+    static {
+        //use a linked HasSetMap to have the nice ordering (mainly for logging)
+        Map<UriRef,Class<?>> map = new LinkedHashMap<UriRef,Class<?>>();
+        //Plain Literal values
+        map.put(null,null);
+        //Typed Literal values
+        map.put(XSD.anyURI,URI.class);
+        map.put(XSD.base64Binary,new byte[]{}.getClass());
+        map.put(XSD.boolean_,Boolean.class);
+        map.put(XSD.byte_,Byte.class);
+        map.put(XSD.date,Date.class);
+        map.put(XSD.dateTime,Date.class);
+        map.put(XSD.decimal,BigDecimal.class);
+        map.put(XSD.double_,Double.class);
+        map.put(XSD.float_,Float.class);
+        map.put(XSD.int_,Integer.class);
+        map.put(XSD.integer,BigInteger.class);
+        map.put(XSD.long_,Long.class);
+        map.put(XSD.short_,Short.class);
+        map.put(XSD.string,String.class);
+        map.put(XSD.time,Date.class);
+        //Data Types for NonLiteral values
+        map.put(RDFS.Resource,URI.class);
+        ONT_TYPE_MAP = Collections.unmodifiableMap(map);
+        ONT_TYPES = ONT_TYPE_MAP.keySet();
+
+        //NOTE: The following XSD types are not included
+        //XSD.gDay,XSD.gMonth,XSD.gMonthDay,XSD.gYearMonth,XSD.hexBinary,XSD.language,
+        //XSD.Name,XSD.NCName,XSD.negativeInteger,XSD.NMTOKEN,XSD.nonNegativeInteger,
+        //XSD.normalizedString,XSD.positiveInteger,
+        //XSD.token,XSD.unsignedByte,XSD.unsignedInt,XSD.unsignedLong,XSD.unsignedShort,
+    }
+    
+    protected final UriRef ontProperty;
+    
+    protected final Converter converter;
+    /**
+     * Getter for the OntologyProperty for this mapping
+     * @return the ontProperty
+     */
+    public final UriRef getOntologyProperty() {
+        return ontProperty;
+    }
+    /**
+     * Getter for the set of Tika {@link Metadata} key names that are used
+     * by this mapping. This is typically used to determine if based on the 
+     * present {@link Metadata#names()} a mapping need to be processed or not.
+     * <p>Mappings need to be called if any of the returned keys is present in
+     * the {@link Metadata}. Mappings that return an empty list MUST BE
+     * called.
+     * @return the Tika {@link Metadata} key names that are used by this mapping.
+     * If no keys are mapped than it MUST return an empty list.
+     */
+    public abstract Set<String> getMappedTikaProperties();
+    
+    protected final UriRef ontType;
+    
+    protected Mapping(UriRef ontProperty,UriRef ontType){
+        this(ontProperty,ontType,null);
+    }
+    protected Mapping(UriRef ontProperty,UriRef ontType,Converter converter){
+        if(ontProperty == null){
+            throw new IllegalArgumentException("The parsed ontology property MUST NOT be NULL!");
+        }
+        this.ontProperty = ontProperty;
+        if(!ONT_TYPES.contains(ontType)){
+            throw new IllegalArgumentException("The ontology type '"+ontType
+                + "' is not supported. (supported: "+ONT_TYPES+")");
+        }
+        this.ontType = ontType;
+        this.converter = converter;
+    }
+    
+    /**
+     * Applies this mapping based on the parsed {@link Metadata} and stores the 
+     * results to {@link MGraph}
+     * @param graph the Graph to store the mapping results
+     * @param subject the subject (context) to add the mappings
+     * @param metadata the metadata used for applying the mapping
+     * @return <code>true</code> if the mapping could be applied based on the
+     * parsed data. Otherwise <code>false</code>. This is intended to be used
+     * by components that need to check if required mappings could be applied.
+     */
+    public abstract boolean apply(MGraph graph, NonLiteral subject, Metadata metadata);
+    /**
+     * Converts the parsed value based on the mapping information to an RDF
+     * {@link Resource}. Optionally supports also validation if the parsed
+     * value is valid for the {@link Mapping#ontType ontology type} specified by
+     * the parsed mapping.
+     * @param value the value
+     * @param mapping the mapping
+     * @param validate 
+     * @return the {@link Resource} or <code>null</code> if the parsed value is
+     * <code>null</code> or {@link String#isEmpty() empty}.
+     * @throws IllegalArgumentException if the parsed {@link Mapping} is 
+     * <code>null</code>
+     */
+    protected Resource toResource(String value, boolean validate){
+        Metadata dummy = null;//used for date validation
+        if(value == null || value.isEmpty()){
+            return null; //ignore null and empty values
+        }
+        Resource object;
+        if(ontType == null){
+            object = new PlainLiteralImpl(value);
+        } else if(ontType == RDFS.Resource){
+            try {
+                if(validate){
+                    new URI(value);
+                }
+                object = new UriRef(value);
+            } catch (URISyntaxException e) {
+                log.warn("Unable to create Reference for value {} (not a valid URI)" +
+                        " -> create a literal instead",value);
+                object = new PlainLiteralImpl(value);
+            }
+        } else { //typed literal
+            Class<?> clazz = Mapping.ONT_TYPE_MAP.get(ontType);
+            if(clazz.equals(Date.class)){ //special handling for dates :(
+                //Dates are special, because Clerezza requires W3C date format
+                //and Tika uses the iso8601 variants.
+                //Because of that here is Tika used to get the Date object for
+                //the parsed value and than the LiteralFactory of Clerezza to
+                //create the TypedLiteral.
+                //Note that because of that no validation is required for
+                //Dates.
+                
+                //Need a dummy metadata object to get access to the private
+                //parseDate(..) method
+                if(dummy == null) { 
+                    dummy = new Metadata();
+                }
+                //any Property with the Date type could be used here
+                dummy.add(DATE.getName(), value);
+                Date date = dummy.getDate(DublinCore.DATE); //access parseDate(..)
+                if(date != null){ //now use the Clerezza Literal factory
+                    object = lf.createTypedLiteral(date);
+                } else { //fall back to xsd:string
+                    object = new TypedLiteralImpl(value, XSD.string);
+                }
+            } else {
+                object = new TypedLiteralImpl(value, ontType);
+            }
+            if(validate && clazz != null && 
+                    !clazz.equals(Date.class)){ //we need not to validate dates
+                try {
+                    lf.createObject(clazz,(TypedLiteral)object);
+                } catch (NoConvertorException e) {
+                    log.info("Unable to validate typed literals of type {} because" +
+                            "there is no converter for Class {} registered with Clerezza",
+                            ontType,clazz);
+                } catch (InvalidLiteralTypeException e) {
+                    log.info("The value '{}' is not valid for dataType {}!" +
+                            "create literal with type 'xsd:string' instead",
+                            value,ontType);
+                    object = new TypedLiteralImpl(value, XSD.string);
+                }
+            } //else no validation needed
+        }
+        if(converter != null){
+            object = converter.convert(object);
+        }
+        return object;
+    }
+    /**
+     * Used by subclasses to log mapped information
+     */
+    protected final static MappingLogger mappingLogger = new MappingLogger();
+    /**
+     * Allows nicely formatted logging of mapped properties
+     * @author Rupert Westenthaler
+     *
+     */
+    protected static class MappingLogger{
+        
+        private List<NonLiteral> subjects = new ArrayList<NonLiteral>();
+        private UriRef predicate;
+        private final int intendSize = 2;
+        private final char[] intnedArray;
+        private static final int MAX_INTEND = 5;
+        
+        private MappingLogger(){
+            intnedArray = new char[MAX_INTEND*intendSize];
+            Arrays.fill(intnedArray, ' ');
+        }
+        private String getIntend(int intend){
+            return String.copyValueOf(intnedArray, 0, 
+                Math.min(MAX_INTEND, intend)*intendSize);
+        }
+        
+        protected void log(NonLiteral subject,UriRef predicate, String prop, Resource object){
+            if(!log.isDebugEnabled()){
+                return;
+            }
+            int intendCount = subjects.indexOf(subject)+1;
+            final String intend;
+            if(intendCount < 1){
+                subjects.add(subject);
+                intendCount = subjects.size();
+                intend = getIntend(intendCount);
+                log.debug("{}context: {}",intend,subject);
+            } else if(intendCount < subjects.size()){
+                for(int i = intendCount;i<subjects.size();i++){
+                    subjects.remove(i);
+                }
+                intend = getIntend(intendCount);
+            } else {
+                intend = getIntend(intendCount);
+            }
+            if(!predicate.equals(this.predicate)){
+                log.debug("{}  {}",intend,predicate);
+            }
+            log.debug("{}    {} {}",new Object[]{
+                intend,object,prop != null ? ("(from: '"+prop+')') : ""
+            });
+        }
+    }
+    
+    public static interface Converter {
+        Resource convert(Resource value);
+    }
+}
\ No newline at end of file

Propchange: incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/Mapping.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/OntologyMappings.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/OntologyMappings.java?rev=1296705&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/OntologyMappings.java (added)
+++ incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/OntologyMappings.java Sat Mar  3 21:17:48 2012
@@ -0,0 +1,505 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.stanbol.enhancer.engines.tika.metadata;
+
+import static java.util.Collections.disjoint;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.TypedLiteral;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.ontologies.DC;
+import org.apache.clerezza.rdf.ontologies.OWL;
+import org.apache.clerezza.rdf.ontologies.RDFS;
+import org.apache.clerezza.rdf.ontologies.SKOS;
+import org.apache.clerezza.rdf.ontologies.XSD;
+import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
+import org.apache.tika.metadata.CreativeCommons;
+import org.apache.tika.metadata.DublinCore;
+import org.apache.tika.metadata.Geographic;
+import org.apache.tika.metadata.HttpHeaders;
+import org.apache.tika.metadata.MSOffice;
+import org.apache.tika.metadata.Message;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TIFF;
+import org.apache.tika.metadata.XMPDM;
+
+/**
+ * Defines mappings for keys used by Apache Tika in the {@link Metadata} to
+ * ontology properties.<p>
+ * 
+ * @author Rupert Westenthaler
+ *
+ */
+public class OntologyMappings implements Iterable<Mapping>{
+    
+    private static OntologyMappings defaultMappings;
+    
+    private final Map<UriRef,Collection<Mapping>> mappings = new HashMap<UriRef,Collection<Mapping>>();
+    /**
+     * Used to protect the default mappings from modifications
+     */
+    private boolean readonly = false;
+    /**
+     * The media ontology namespace
+     */
+    private static String ma = "http://www.w3.org/ns/ma-ont#";
+    
+    public static OntologyMappings getDefaultMappings(){
+        if(defaultMappings == null){
+            defaultMappings = new OntologyMappings();
+            //TODO: validate the defaults
+            addMediaResourceOntologyMappings(defaultMappings);
+            addNepomukMessageMappings(defaultMappings);
+            addRdfsMappings(defaultMappings);
+        }
+        return defaultMappings;
+    }
+    
+    /**
+     * @param mappings
+     */
+    public static void addNepomukMessageMappings(OntologyMappings mappings) {
+        String nmo = "http://www.semanticdesktop.org/ontologies/2007/03/22/nmo#";
+        mappings.addMappings( 
+            new PropertyMapping(nmo+"bbc",Message.MESSAGE_BCC));
+        mappings.addMappings( 
+            new PropertyMapping(nmo+"cc",Message.MESSAGE_CC));
+        mappings.addMappings( 
+            new PropertyMapping(nmo+"from",Message.MESSAGE_FROM));
+        mappings.addMappings( 
+            new PropertyMapping(nmo+"to",Message.MESSAGE_TO));
+    }
+
+    /**
+     * @param mappings
+     */
+    public static void addGeoMappings(OntologyMappings mappings) {
+        mappings.addMappings(
+            new PropertyMapping(NamespaceEnum.geo+"alt",XSD.double_,Geographic.ALTITUDE.getName()));
+        mappings.addMappings(
+            new PropertyMapping(NamespaceEnum.geo+"lat",XSD.double_,Geographic.LATITUDE.getName()));
+        mappings.addMappings(
+            new PropertyMapping(NamespaceEnum.geo+"long",XSD.double_,Geographic.LONGITUDE.getName()));
+    }
+
+    /**
+     * Maps the {@link TIFF} metadata to the Nepomuk EXIF ontology. This uses
+     * the more preceise datatyped as defined by {@link TIFF} rather than
+     * xsd:string as defined for most of the properites within the Nepomuk
+     * ontology.
+     * @param mappings
+     */
+    public static void addNepomukExifMappings(OntologyMappings mappings) {
+        String exif = "http://www.semanticdesktop.org/ontologies/2007/05/10/nexif#";
+        mappings.addMappings( 
+            new PropertyMapping(exif+"bitsPerSample",XSD.int_,TIFF.BITS_PER_SAMPLE.getName()));
+        mappings.addMappings(
+            new PropertyMapping(exif+"make",TIFF.EQUIPMENT_MAKE.getName()));
+        mappings.addMappings(
+            new PropertyMapping(exif+"model",TIFF.EQUIPMENT_MODEL.getName()));
+        mappings.addMappings( 
+            new PropertyMapping(exif+"exposureTime",XSD.double_,TIFF.EXPOSURE_TIME.getName()));
+        mappings.addMappings( 
+            new PropertyMapping(exif+"fNumber",XSD.double_,TIFF.F_NUMBER.getName()));
+        mappings.addMappings( 
+            new PropertyMapping(exif+"flash",XSD.boolean_,TIFF.FLASH_FIRED.getName()));
+        mappings.addMappings( 
+            new PropertyMapping(exif+"focalLength",XSD.double_,TIFF.FOCAL_LENGTH.getName()));
+        mappings.addMappings(
+            new PropertyMapping(exif+"relatedImageLength",XSD.int_,TIFF.IMAGE_LENGTH.getName()));
+        mappings.addMappings(
+            new PropertyMapping(exif+"relatedImageWidth",XSD.int_,TIFF.IMAGE_WIDTH.getName()));
+        mappings.addMappings( 
+            new PropertyMapping(exif+"isoSpeedRatings",XSD.int_,TIFF.ISO_SPEED_RATINGS.getName()));
+        mappings.addMappings(
+            new PropertyMapping(exif+"orientation",XSD.string,TIFF.ORIENTATION.getName()));
+        mappings.addMappings( 
+            new PropertyMapping(exif+"dateTimeOriginal",XSD.dateTime,TIFF.ORIGINAL_DATE.getName()));
+        mappings.addMappings(
+            new PropertyMapping(exif+"xResolution",XSD.double_,TIFF.RESOLUTION_HORIZONTAL.getName()));
+        mappings.addMappings(
+            new PropertyMapping(exif+"resolutionUnit",XSD.string,TIFF.RESOLUTION_UNIT.getName()));
+        mappings.addMappings(
+            new PropertyMapping(exif+"yResolution",XSD.double_,TIFF.RESOLUTION_VERTICAL.getName()));
+        mappings.addMappings(
+            new PropertyMapping(exif+"samplesPerPixel",XSD.int_,TIFF.SAMPLES_PER_PIXEL.getName()));
+        mappings.addMappings(
+            new PropertyMapping(exif+"software",TIFF.SOFTWARE.getName()));
+    }
+
+    /**
+     * Adds the Mappings for {@link DublinCore}<p>
+     * Two mappings are added for each property<ul>
+     * <li> <a href="http://dublincore.org/documents/dcmi-terms/">Dublin Core Terms</a>
+     * <li> The <a href="http://www.w3.org/TR/mediaont-10/#dc-table">DC terms 
+     * mappings</a> of the Media Annotation Ontology.
+     * </ul>
+     * @param mappings The ontology mappings to add the DC mappings
+     */
+    public static void addDcMappings(OntologyMappings mappings) {
+        
+        mappings.addMapping(
+            new PropertyMapping(DC.contributor,
+                DublinCore.CONTRIBUTOR,MSOffice.LAST_AUTHOR));
+        mappings.addMapping(
+            new PropertyMapping(DC.coverage,DublinCore.COVERAGE));
+        mappings.addMappings(
+            new PropertyMapping(DC.creator,
+                DublinCore.CREATOR,MSOffice.AUTHOR,"initial-creator"));
+        mappings.addMappings( 
+            new PropertyMapping(DC.description,DublinCore.DESCRIPTION));
+        mappings.addMappings( 
+            new PropertyMapping(DC.format,
+                DublinCore.FORMAT,HttpHeaders.CONTENT_TYPE));
+        mappings.addMappings( 
+            new PropertyMapping(DC.identifier,DublinCore.IDENTIFIER));
+        mappings.addMappings(
+            new PropertyMapping(DC.language,
+                DublinCore.LANGUAGE,HttpHeaders.CONTENT_LANGUAGE));
+        mappings.addMappings(
+            new PropertyMapping(NamespaceEnum.dc+"modified",XSD.dateTime,
+                DublinCore.MODIFIED,"Last-Modified"));
+        mappings.addMappings( 
+            new PropertyMapping(DC.publisher,
+                DublinCore.PUBLISHER,MSOffice.COMPANY));
+        mappings.addMappings( 
+            new PropertyMapping(DC.relation,DublinCore.RELATION));
+        mappings.addMappings(
+            new PropertyMapping(DC.rights,DublinCore.RIGHTS));
+        mappings.addMappings( 
+            new PropertyMapping(DC.source,DublinCore.SOURCE));
+        mappings.addMappings( 
+            new PropertyMapping(DC.subject,
+                DublinCore.SUBJECT,MSOffice.KEYWORDS));
+        mappings.addMappings( 
+            new PropertyMapping(DC.title,DublinCore.TITLE));
+        mappings.addMappings( 
+            new PropertyMapping(DC.type,DublinCore.TYPE));
+        mappings.addMappings( 
+            new PropertyMapping(DC.date,XSD.dateTime,DublinCore.DATE.getName()));
+        //MS Office -> DC
+        mappings.addMappings(
+            new PropertyMapping(NamespaceEnum.dc+"created",XSD.dateTime,
+                MSOffice.CREATION_DATE.getName(),"created"));
+        
+    }
+    public static void addMediaResourceOntologyMappings(OntologyMappings mappings){
+        mappings.addMappings(
+            new PropertyMapping(ma+"hasContributor",
+                DublinCore.CONTRIBUTOR,XMPDM.ARTIST.getName(),XMPDM.COMPOSER.getName()));
+        mappings.addMapping( 
+            new ResourceMapping(ma+"hasLocation",
+                new TypeMapping(ma+"Location"),
+                new PropertyMapping(ma+"locationName",DublinCore.COVERAGE)));
+        mappings.addMappings( 
+            new PropertyMapping(ma+"hasCreator",
+                DublinCore.CREATOR,MSOffice.AUTHOR,"initial-creator"));
+        mappings.addMappings( 
+            new PropertyMapping(ma+"description",DublinCore.DESCRIPTION));
+        mappings.addMappings( 
+            new PropertyMapping(ma+"hasFormat",
+                DublinCore.FORMAT,HttpHeaders.CONTENT_TYPE));
+        /*
+         * Excerpt of the MA recommendation:
+         *   The identifier of a media resource is represented in RDF by the URI 
+         *   of the node representing that media resource. If a resource is 
+         *   identified by several URI, owl:sameAs should be used.
+         */
+        mappings.addMappings( 
+            new PropertyMapping(OWL.sameAs,RDFS.Resource,DublinCore.IDENTIFIER));
+        mappings.addMappings( 
+            new PropertyMapping(ma+"hasLanguage",
+                DublinCore.LANGUAGE,HttpHeaders.CONTENT_LANGUAGE));
+        mappings.addMappings( 
+            new PropertyMapping(ma+"editDate",XSD.dateTime,
+                DublinCore.MODIFIED,MSOffice.LAST_SAVED.getName()));
+        mappings.addMappings(
+            new PropertyMapping(ma+"hasPublisher",DublinCore.PUBLISHER));
+        mappings.addMappings( 
+            new PropertyMapping(ma+"hasRelatedResource",DublinCore.RELATION));
+        mappings.addMappings( 
+            new PropertyMapping(ma+"copyright",RDFS.Resource,
+                //DC:rights and cc:license
+                DublinCore.RIGHTS,CreativeCommons.LICENSE_LOCATION, CreativeCommons.LICENSE_URL,
+                XMPDM.COPYRIGHT.getName()));
+        mappings.addMappings( 
+            new PropertyMapping(ma+"isMemberOf",DublinCore.SOURCE));
+        mappings.addMappings( 
+            new PropertyMapping(ma+"hasKeyword",
+                DublinCore.SUBJECT,MSOffice.KEYWORDS));
+        mappings.addMappings( 
+            new PropertyMapping(ma+"title",
+                DublinCore.TITLE,XMPDM.SCENE.getName(),XMPDM.TAPE_NAME.getName(),
+                XMPDM.SHOT_NAME.getName()));
+        mappings.addMapping(
+            new PropertyMapping(ma+"alternativeTitle", XMPDM.ALT_TAPE_NAME.getName()));
+        mappings.addMapping(
+            new PropertyMapping(ma+"mainOriginalTitle", XMPDM.ALBUM.getName()));
+        mappings.addMappings( 
+            new PropertyMapping(ma+"hasGenre",
+                DublinCore.TYPE,XMPDM.GENRE.getName()));
+        mappings.addMappings(
+            new PropertyMapping(ma+"creationDate",XSD.dateTime,
+                DublinCore.DATE.getName(),MSOffice.CREATION_DATE.getName(),"created"));
+        mappings.addMapping(
+            new PropertyMapping(ma+"description", 
+                DublinCore.DESCRIPTION,MSOffice.COMMENTS));
+        
+        mappings.addMappings( 
+            new PropertyMapping(ma+"hasContributor",
+                MSOffice.LAST_AUTHOR,MSOffice.AUTHOR,XMPDM.ENGINEER.getName()));
+
+        
+        //other properties -> Media Ontology
+        mappings.addMappings(
+            new PropertyMapping(ma+"hasCreator","producer","initial-creator"));
+
+        //EXIF -> Media Ontology
+        mappings.addMappings(
+            new PropertyMapping(ma+"frameHeight",XSD.int_,TIFF.IMAGE_LENGTH.getName()));
+        mappings.addMappings(
+            new PropertyMapping(ma+"frameWidth",XSD.int_,TIFF.IMAGE_WIDTH.getName()));
+        mappings.addMappings( 
+            new PropertyMapping(ma+"creationDate",XSD.dateTime,
+                TIFF.ORIGINAL_DATE.getName(),XMPDM.SHOT_DATE.getName()));
+
+        //XMP -> Media Ontology
+        //here we need to split up the metadata for the audio and video
+        mappings.addMapping(
+            new PropertyMapping(ma+"releaseDate", XSD.dateTime,XMPDM.RELEASE_DATE.getName()));
+        mappings.addMapping(new ResourceMapping(ma+"hasTrack", 
+            new Mapping[]{ /* no required */},
+            new Mapping[]{//optional
+                new PropertyMapping(ma+"hasFormat",XSD.string,XMPDM.AUDIO_CHANNEL_TYPE.getName()),
+                new PropertyMapping(ma+"hasCompression",XSD.string,XMPDM.AUDIO_COMPRESSOR.getName()),
+                new PropertyMapping(ma+"editDate", XMPDM.AUDIO_MOD_DATE.getName()),
+                new PropertyMapping(ma+"samplingRate", XSD.int_,XMPDM.AUDIO_SAMPLE_RATE.getName())
+            }, new Mapping[]{
+                new TypeMapping(ma+"MediaFragment"),
+                new TypeMapping(ma+"Track"),
+                new TypeMapping(ma+"AudioTrack"),
+            }
+            ));
+        mappings.addMapping(new ResourceMapping(ma+"hasTrack", 
+            new Mapping[]{ /* no required */},
+            new Mapping[]{//optional
+                new PropertyMapping(ma+"hasCompression",XSD.string,XMPDM.VIDEO_COMPRESSOR.getName()),
+                new PropertyMapping(ma+"editDate", XMPDM.VIDEO_MOD_DATE.getName()),
+                new PropertyMapping(ma+"frameRate", XSD.double_,XMPDM.VIDEO_FRAME_RATE.getName())
+            },
+            new Mapping[]{ //additioanl
+                new TypeMapping(ma+"MediaFragment"),
+                new TypeMapping(ma+"Track"),
+                new TypeMapping(ma+"VideoTrack"),
+                new PropertyMapping(ma+"frameHeight",XSD.int_,TIFF.IMAGE_LENGTH.getName()),
+                new PropertyMapping(ma+"frameWidth",XSD.int_,TIFF.IMAGE_WIDTH.getName())
+            }));
+        mappings.addMapping(
+            new PropertyMapping(ma+"numberOfTracks",XSD.int_,XMPDM.TRACK_NUMBER.getName()));
+        mappings.addMapping(
+            new PropertyMapping(ma+"averageBitRate",XSD.double_,
+                new Mapping.Converter(){//we need to convert from MByte/min to kByte/sec
+                    @Override
+                    public Resource convert(Resource value) {
+                        if(value instanceof TypedLiteral &&
+                                XSD.double_.equals(((TypedLiteral)value).getDataType())){
+                            LiteralFactory lf = LiteralFactory.getInstance();
+                            double mm = lf.createObject(Double.class, (TypedLiteral)value);
+                            return lf.createTypedLiteral(Double.valueOf(
+                                mm*1024/60));
+                        } else {
+                            return value; //do not convert
+                        }
+                    }
+                
+            },XMPDM.FILE_DATA_RATE.getName()));
+
+        //GEO -> Media Resource Ontology
+        mappings.addMapping(new ResourceMapping(ma+"hasLocation", 
+            new Mapping[]{ //required
+                new PropertyMapping(ma+"locationLatitude", XSD.double_,Geographic.LATITUDE.getName()),
+                new PropertyMapping(ma+"locationLongitude", XSD.double_,Geographic.LONGITUDE.getName())          
+            },new Mapping[]{ //optional
+                new PropertyMapping(ma+"locationAltitude", XSD.double_,Geographic.ALTITUDE.getName())          
+            },new Mapping[]{//additional
+                new TypeMapping(ma+"Location")
+            }));
+    }
+//TODO
+//    public static void addNepomukId3Mappings(OntologyMappings mappings){
+//        XMPDM.ABS_PEAK_AUDIO_FILE_PATH;
+//        XMPDM.ALBUM;
+//        XMPDM.ALT_TAPE_NAME;
+//        XMPDM.ARTIST;
+//        XMPDM.AUDIO_CHANNEL_TYPE;
+//        XMPDM.AUDIO_COMPRESSOR;
+//        XMPDM.AUDIO_MOD_DATE;
+//        XMPDM.AUDIO_SAMPLE_RATE;
+//        XMPDM.AUDIO_SAMPLE_TYPE;
+//        XMPDM.COMPOSER;
+//        XMPDM.COPYRIGHT;
+//        XMPDM.ENGINEER;
+//        XMPDM.FILE_DATA_RATE;
+//        XMPDM.GENRE;
+//        XMPDM.INSTRUMENT;
+//        XMPDM.KEY;
+//        XMPDM.LOG_COMMENT;
+//        XMPDM.LOOP;
+//        XMPDM.METADATA_MOD_DATE;
+//        XMPDM.NUMBER_OF_BEATS;
+//        XMPDM.PULL_DOWN;
+//        XMPDM.RELATIVE_PEAK_AUDIO_FILE_PATH;
+//        XMPDM.RELEASE_DATE;
+//        XMPDM.SCALE_TYPE;
+//        XMPDM.SCENE;
+//        XMPDM.SHOT_DATE;
+//        XMPDM.SHOT_LOCATION;
+//        XMPDM.SHOT_NAME;
+//        XMPDM.SPEAKER_PLACEMENT;
+//        XMPDM.STRETCH_MODE;
+//        XMPDM.TAPE_NAME;
+//        XMPDM.TEMPO;
+//        XMPDM.TIME_SIGNATURE;
+//        XMPDM.TRACK_NUMBER;
+//        XMPDM.VIDEO_ALPHA_MODE;
+//        XMPDM.VIDEO_ALPHA_UNITY_IS_TRANSPARENT;
+//        XMPDM.VIDEO_COLOR_SPACE;
+//        XMPDM.VIDEO_COMPRESSOR;
+//        XMPDM.VIDEO_FIELD_ORDER;
+//        XMPDM.VIDEO_FRAME_RATE;
+//        XMPDM.VIDEO_MOD_DATE;
+//        XMPDM.VIDEO_PIXEL_ASPECT_RATIO;
+//        XMPDM.VIDEO_PIXEL_DEPTH;
+//    }
+    public static void addSkosMappings(OntologyMappings mappings){
+        //DC -> SKOS
+        mappings.addMappings( 
+            new PropertyMapping(SKOS.prefLabel,
+                DublinCore.TITLE));
+        mappings.addMappings( 
+            new PropertyMapping(SKOS.definition,
+                DublinCore.DESCRIPTION));
+        mappings.addMappings(
+            new PropertyMapping(SKOS.notation,
+                DublinCore.IDENTIFIER));
+        //MS Office -> SKOS
+        mappings.addMappings( 
+            new PropertyMapping(SKOS.note,MSOffice.COMMENTS));
+        mappings.addMappings( 
+            new PropertyMapping(SKOS.editorialNote,
+                MSOffice.NOTES,XMPDM.LOG_COMMENT.getName()));
+    }
+    
+    public static void addRdfsMappings(OntologyMappings mappings){
+        //DC
+        mappings.addMappings( 
+            new PropertyMapping(RDFS.label,DublinCore.TITLE));
+        mappings.addMappings( 
+            new PropertyMapping(RDFS.comment,DublinCore.DESCRIPTION,MSOffice.COMMENTS));
+    }
+    
+    /**
+     * Maps only {@link CreativeCommons#LICENSE_URL} to cc:license
+     * @param mappings
+     */
+    public static void addCreativeCommonsMappings(OntologyMappings mappings){
+        mappings.addMapping( 
+            new PropertyMapping("http://creativecommons.org/ns#license",RDFS.Resource,
+                CreativeCommons.LICENSE_URL,CreativeCommons.LICENSE_LOCATION));
+
+    }
+    
+    
+    public void addMappings(Mapping...mappings){
+        if(mappings == null || mappings.length > 1){
+            return; //nothing to do
+        }
+        for(Mapping m : mappings){
+            addMapping(m);
+        }
+    }
+    public void addMapping(Mapping mapping){
+        if(readonly){
+            throw new IllegalStateException("This "+getClass().getSimpleName()+" instance is read only!");
+        }
+        if(mapping == null){
+            return; //nothing to do
+        }
+        Collection<Mapping> propMappings = this.mappings.get(mapping.getOntologyProperty());
+        if(propMappings == null){
+            propMappings = new HashSet<Mapping>();
+            this.mappings.put(mapping.getOntologyProperty(), propMappings);
+        }
+        propMappings.add(mapping);
+    }
+    public void removePropertyMappings(UriRef property){
+        if(readonly){
+            throw new IllegalStateException("This "+getClass().getSimpleName()+" instance is read only!");
+        }
+        this.mappings.remove(property);
+    }
+    
+    public void apply(MGraph graph, UriRef context, Metadata metadata){
+        Set<String> keys = new HashSet<String>(Arrays.asList(metadata.names()));
+        for(Mapping mapping : this){
+            if(mapping.getMappedTikaProperties().isEmpty() ||
+                    !disjoint(keys, mapping.getMappedTikaProperties())){
+                mapping.apply(graph, context, metadata);
+            }
+        }
+    }
+    @Override
+    public Iterator<Mapping> iterator() {
+        return new Iterator<Mapping>() {
+            Iterator<Collection<Mapping>> mappingsIt = OntologyMappings.this.mappings.values().iterator();
+            Iterator<Mapping> mappingIt = Collections.EMPTY_LIST.iterator();
+            @Override
+            public boolean hasNext() {
+                //assumes no empty lists as values of OntologyMappings.this.mappings
+                return mappingIt.hasNext() || mappingsIt.hasNext();
+            }
+
+            @Override
+            public Mapping next() {
+                //assumes no empty lists as values of OntologyMappings.this.mappings
+                if(!mappingIt.hasNext()){
+                    mappingIt = mappingsIt.next().iterator();
+                }
+                return mappingIt.next();
+            }
+
+            @Override
+            public void remove() {
+                throw new UnsupportedOperationException("Removal not Supported!");
+            }
+            
+        };
+    }
+
+}

Propchange: incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/OntologyMappings.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/PropertyMapping.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/PropertyMapping.java?rev=1296705&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/PropertyMapping.java (added)
+++ incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/PropertyMapping.java Sat Mar  3 21:17:48 2012
@@ -0,0 +1,102 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.stanbol.enhancer.engines.tika.metadata;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.tika.metadata.Metadata;
+
+public final class PropertyMapping extends Mapping {
+    
+    /**
+     * A Set with the mapped properties
+     */
+    protected final Set<String> tikaProperties;
+
+    public PropertyMapping(String ontProperty, UriRef ontType,String...tikaProperties) {
+        this(ontProperty == null? null : new UriRef(ontProperty), ontType,tikaProperties);
+    }
+    public PropertyMapping(String ontProperty, UriRef ontType,Converter converter,String...tikaProperties) {
+        this(ontProperty == null? null : new UriRef(ontProperty), ontType,converter,tikaProperties);
+    }
+
+    public PropertyMapping(String ontProperty,String...tikaProperties) {
+        this(ontProperty == null? null : new UriRef(ontProperty),null,tikaProperties);
+    }
+
+    public PropertyMapping(UriRef ontProperty,String...tikaProperties) {
+        this(ontProperty,null,tikaProperties);
+    }
+    public PropertyMapping(UriRef ontProperty, UriRef ontType,String...tikaProperties) {
+        this(ontProperty,ontType,null,tikaProperties);
+    }
+    public PropertyMapping(UriRef ontProperty, UriRef ontType,Converter converter,String...tikaProperties) {
+        super(ontProperty, ontType,converter);
+        if(tikaProperties == null || tikaProperties.length < 1){
+            throw new IllegalArgumentException("The list of parsed Tika properties MUST NOT be NULL nor empty!");
+        }
+        this.tikaProperties = Collections.unmodifiableSet(new HashSet<String>(
+                Arrays.asList(tikaProperties)));
+        if(this.tikaProperties.contains(null) || this.tikaProperties.contains("")){
+            throw new IllegalArgumentException("Teh parsed list of Tika properties MUST NOT " +
+            		"contain NULL or empty members (parsed: "+Arrays.toString(tikaProperties)+")!");
+        }
+    }
+
+    @Override
+    public boolean apply(MGraph graph, NonLiteral subject, Metadata metadata) {
+        Set<Resource> values = new HashSet<Resource>();
+        for(String tikaProperty : tikaProperties){
+            String[] tikaPropValues = metadata.getValues(tikaProperty);
+            if(tikaPropValues != null && tikaPropValues.length > 0){
+                for(String tikaPropValue : tikaPropValues){
+                    Resource resource = toResource(tikaPropValue, true);
+                    if(resource != null){
+                        values.add(resource);
+                        mappingLogger.log(subject, ontProperty, tikaProperty, resource);
+                    }
+                }
+            }
+        }
+        values.remove(null);
+        values.remove("");
+        if(values.isEmpty()){
+            return false;
+        } else {
+            for(Resource resource : values){
+                graph.add(new TripleImpl(subject, ontProperty, resource));
+            }
+            return true;
+        }
+    }
+
+    @Override
+    public Set<String> getMappedTikaProperties() {
+        return tikaProperties;
+    }
+    
+}

Propchange: incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/PropertyMapping.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/ResourceMapping.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/ResourceMapping.java?rev=1296705&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/ResourceMapping.java (added)
+++ incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/ResourceMapping.java Sat Mar  3 21:17:48 2012
@@ -0,0 +1,125 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.stanbol.enhancer.engines.tika.metadata;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.BNode;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.tika.metadata.Metadata;
+
+public final class ResourceMapping extends Mapping{
+
+    private static final Mapping[] EMPTY = new Mapping[]{};
+    
+    Collection<Mapping> required;
+    Collection<Mapping> optional;
+    Collection<Mapping> additional;
+    Set<String> mappedTikaProperties;
+    
+    public ResourceMapping(String ontProperty, Mapping...required) {
+        this(new UriRef(ontProperty), required);
+    }
+    public ResourceMapping(String ontProperty, Mapping[] required, Mapping[] optional,Mapping[] additional) {
+        this(new UriRef(ontProperty), required,optional,additional);
+    }
+
+    public ResourceMapping(UriRef ontProperty, Mapping...requried) {
+        this(ontProperty,requried,null,null);
+    }
+    public ResourceMapping(UriRef ontProperty, Mapping[] required, Mapping[] optional,Mapping[] additional) {
+        super(ontProperty,null);
+        required = required == null ? EMPTY : required;
+        optional = optional == null ? EMPTY : optional;
+        additional = additional == null ? EMPTY : additional;
+        if(required.length < 1 && optional.length <1){
+            throw new IllegalArgumentException("Neighter optional nor required subMappings where parsed!");
+        }
+        Set<String> mapped = new HashSet<String>();
+        this.required = Arrays.asList(required);
+        if(this.required.contains(null)){
+            throw new IllegalArgumentException("Tha parsed Array of required sub mappings MUST NOT contain a NULL element" +
+            		"(parsed: "+this.required+")");
+        }
+        for(Mapping m : this.required){
+            mapped.addAll(m.getMappedTikaProperties());
+        }
+        this.optional = Arrays.asList(optional);
+        if(this.optional.contains(null)){
+            throw new IllegalArgumentException("Tha parsed Array of optional sub mappings MUST NOT contain a NULL element" +
+                    "(parsed: "+this.optional+")");
+        }
+        for(Mapping m : this.optional){
+            mapped.addAll(m.getMappedTikaProperties());
+        }
+        mapped.remove(null);
+        this.mappedTikaProperties = Collections.unmodifiableSet(mapped);
+        //additional mappings
+        if(additional != null){
+            this.additional = Arrays.asList(additional);
+        } else {
+            this.additional = Collections.emptySet();
+        }
+        if(this.additional.contains(null)){
+            throw new IllegalArgumentException("Tha parsed Array of additional sub mappings MUST NOT contain a NULL element" +
+                    "(parsed: "+this.additional+")");
+        }
+        //NOTE: additional mappings are not added to the mappedTikaProperties
+    }
+
+    @Override
+    public boolean apply(MGraph graph, NonLiteral subject, Metadata metadata) {
+        boolean added = false;
+        NonLiteral s = new BNode();
+        mappingLogger.log(subject, ontProperty, null, s);
+        if(!required.isEmpty()) {
+            MGraph g = new SimpleMGraph();
+            for(Mapping m : required){
+                if(!m.apply(g, s, metadata)){
+                    return false;
+                }
+            }
+            graph.addAll(g);
+            added = true;
+        }
+        for(Mapping m : optional){
+            if(m.apply(graph, s, metadata)){
+                added = true;
+            }
+        }
+        if(added){
+            for(Mapping m : additional){
+                m.apply(graph, s, metadata);
+            }
+            graph.add(new TripleImpl(subject,ontProperty,s));
+        }
+        return added;
+    }
+    @Override
+    public Set<String> getMappedTikaProperties() {
+        return mappedTikaProperties;
+    }
+}

Propchange: incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/ResourceMapping.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/TypeMapping.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/TypeMapping.java?rev=1296705&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/TypeMapping.java (added)
+++ incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/TypeMapping.java Sat Mar  3 21:17:48 2012
@@ -0,0 +1,36 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.stanbol.enhancer.engines.tika.metadata;
+
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.ontologies.RDF;
+
+/**
+ * A {@link ConstantMapping} for the property {@link RDF#type}
+ * @author Rupert Westenthaler
+ *
+ */
+public class TypeMapping extends ConstantMapping {
+
+    public TypeMapping(String type) {
+        this(new UriRef(type));
+    }
+    public TypeMapping(UriRef...types) {
+        super(RDF.type, types);
+    }
+
+}

Propchange: incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/TypeMapping.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain