You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/03/03 22:17:48 UTC
svn commit: r1296705 - in
/incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata:
ConstantMapping.java Mapping.java OntologyMappings.java
PropertyMapping.java ResourceMapping.java TypeMapping.java
Author: rwesten
Date: Sat Mar 3 21:17:48 2012
New Revision: 1296705
URL: http://svn.apache.org/viewvc?rev=1296705&view=rev
Log:
STANBOL-512: Added support for Tika metadata -> Ontology mappings
Added:
incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/ConstantMapping.java (with props)
incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/Mapping.java (with props)
incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/OntologyMappings.java (with props)
incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/PropertyMapping.java (with props)
incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/ResourceMapping.java (with props)
incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/TypeMapping.java (with props)
Added: incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/ConstantMapping.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/ConstantMapping.java?rev=1296705&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/ConstantMapping.java (added)
+++ incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/ConstantMapping.java Sat Mar 3 21:17:48 2012
@@ -0,0 +1,61 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.stanbol.enhancer.engines.tika.metadata;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.tika.metadata.Metadata;
+
+public class ConstantMapping extends Mapping{
+
+
+ private Collection<Resource> values;
+
+ public ConstantMapping(UriRef ontProperty, Resource...values) {
+ super(ontProperty, null);
+ if(values == null || values.length < 1){
+ throw new IllegalArgumentException("The parsed values MUST NOT be NULL nor an empty array");
+ }
+ this.values = Arrays.asList(values);
+ if(this.values.contains(null)){
+ throw new IllegalArgumentException("The parsed values MUST NOT contain a NULL element " +
+ "(parsed: "+this.values+")!");
+ }
+ }
+
+ @Override
+ public boolean apply(MGraph graph, NonLiteral subject, Metadata metadata) {
+ for(Resource value : values){
+ graph.add(new TripleImpl(subject, ontProperty, value));
+ mappingLogger.log(subject, ontProperty, null, value);
+ }
+ return true;
+ }
+
+ @Override
+ public Set<String> getMappedTikaProperties() {
+ return Collections.emptySet();
+ }
+}
Propchange: incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/ConstantMapping.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/Mapping.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/Mapping.java?rev=1296705&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/Mapping.java (added)
+++ incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/Mapping.java Sat Mar 3 21:17:48 2012
@@ -0,0 +1,310 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.stanbol.enhancer.engines.tika.metadata;
+
+import static org.apache.tika.metadata.DublinCore.DATE;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.BNode;
+import org.apache.clerezza.rdf.core.InvalidLiteralTypeException;
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NoConvertorException;
+import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.PlainLiteral;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.TypedLiteral;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.clerezza.rdf.core.impl.TypedLiteralImpl;
+import org.apache.clerezza.rdf.ontologies.RDFS;
+import org.apache.clerezza.rdf.ontologies.XSD;
+import org.apache.tika.metadata.DublinCore;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Used as value for Apache Tika {@link Metadata} mappings. Holds the
+ * ontology property as {@link UriRef} and optionally a Tika {@link Property}.
+ * Later can be used to parse the correct datatype for values contained in the
+ * {@link Metadata}
+ *
+ * @author westei
+ *
+ */
+public abstract class Mapping {
+
+ private final static Logger log = LoggerFactory.getLogger(Mapping.class);
+ private static final LiteralFactory lf = LiteralFactory.getInstance();
+
+ /**
+ * List with allowed DataTypes.<ul>
+ * <li> <code>null</code> is used for {@link PlainLiteral}s
+ * <li> {@link XSD} datatyoes are used for {@link TypedLiteral}s
+ * <li> {@link RDFS#Resource} is used for {@link NonLiteral} values. Note
+ * that only {@link UriRef} is supported, because for Tika {@link BNode}s
+ * do not make sense.
+ * </ul>
+ */
+ public static final Set<UriRef> ONT_TYPES;
+ /**
+ * Map with the same keys as contained in {@link #ONT_TYPES}. The values
+ * are the java types.
+ */
+ protected static final Map<UriRef,Class<?>> ONT_TYPE_MAP;
+
+ static {
+ //use a linked HasSetMap to have the nice ordering (mainly for logging)
+ Map<UriRef,Class<?>> map = new LinkedHashMap<UriRef,Class<?>>();
+ //Plain Literal values
+ map.put(null,null);
+ //Typed Literal values
+ map.put(XSD.anyURI,URI.class);
+ map.put(XSD.base64Binary,new byte[]{}.getClass());
+ map.put(XSD.boolean_,Boolean.class);
+ map.put(XSD.byte_,Byte.class);
+ map.put(XSD.date,Date.class);
+ map.put(XSD.dateTime,Date.class);
+ map.put(XSD.decimal,BigDecimal.class);
+ map.put(XSD.double_,Double.class);
+ map.put(XSD.float_,Float.class);
+ map.put(XSD.int_,Integer.class);
+ map.put(XSD.integer,BigInteger.class);
+ map.put(XSD.long_,Long.class);
+ map.put(XSD.short_,Short.class);
+ map.put(XSD.string,String.class);
+ map.put(XSD.time,Date.class);
+ //Data Types for NonLiteral values
+ map.put(RDFS.Resource,URI.class);
+ ONT_TYPE_MAP = Collections.unmodifiableMap(map);
+ ONT_TYPES = ONT_TYPE_MAP.keySet();
+
+ //NOTE: The following XSD types are not included
+ //XSD.gDay,XSD.gMonth,XSD.gMonthDay,XSD.gYearMonth,XSD.hexBinary,XSD.language,
+ //XSD.Name,XSD.NCName,XSD.negativeInteger,XSD.NMTOKEN,XSD.nonNegativeInteger,
+ //XSD.normalizedString,XSD.positiveInteger,
+ //XSD.token,XSD.unsignedByte,XSD.unsignedInt,XSD.unsignedLong,XSD.unsignedShort,
+ }
+
+ protected final UriRef ontProperty;
+
+ protected final Converter converter;
+ /**
+ * Getter for the OntologyProperty for this mapping
+ * @return the ontProperty
+ */
+ public final UriRef getOntologyProperty() {
+ return ontProperty;
+ }
+ /**
+ * Getter for the set of Tika {@link Metadata} key names that are used
+ * by this mapping. This is typically used to determine if based on the
+ * present {@link Metadata#names()} a mapping need to be processed or not.
+ * <p>Mappings need to be called if any of the returned keys is present in
+ * the {@link Metadata}. Mappings that return an empty list MUST BE
+ * called.
+ * @return the Tika {@link Metadata} key names that are used by this mapping.
+ * If no keys are mapped than it MUST return an empty list.
+ */
+ public abstract Set<String> getMappedTikaProperties();
+
+ protected final UriRef ontType;
+
+ protected Mapping(UriRef ontProperty,UriRef ontType){
+ this(ontProperty,ontType,null);
+ }
+ protected Mapping(UriRef ontProperty,UriRef ontType,Converter converter){
+ if(ontProperty == null){
+ throw new IllegalArgumentException("The parsed ontology property MUST NOT be NULL!");
+ }
+ this.ontProperty = ontProperty;
+ if(!ONT_TYPES.contains(ontType)){
+ throw new IllegalArgumentException("The ontology type '"+ontType
+ + "' is not supported. (supported: "+ONT_TYPES+")");
+ }
+ this.ontType = ontType;
+ this.converter = converter;
+ }
+
+ /**
+ * Applies this mapping based on the parsed {@link Metadata} and stores the
+ * results to {@link MGraph}
+ * @param graph the Graph to store the mapping results
+ * @param subject the subject (context) to add the mappings
+ * @param metadata the metadata used for applying the mapping
+ * @return <code>true</code> if the mapping could be applied based on the
+ * parsed data. Otherwise <code>false</code>. This is intended to be used
+ * by components that need to check if required mappings could be applied.
+ */
+ public abstract boolean apply(MGraph graph, NonLiteral subject, Metadata metadata);
+ /**
+ * Converts the parsed value based on the mapping information to an RDF
+ * {@link Resource}. Optionally supports also validation if the parsed
+ * value is valid for the {@link Mapping#ontType ontology type} specified by
+ * the parsed mapping.
+ * @param value the value
+ * @param mapping the mapping
+ * @param validate
+ * @return the {@link Resource} or <code>null</code> if the parsed value is
+ * <code>null</code> or {@link String#isEmpty() empty}.
+ * @throws IllegalArgumentException if the parsed {@link Mapping} is
+ * <code>null</code>
+ */
+ protected Resource toResource(String value, boolean validate){
+ Metadata dummy = null;//used for date validation
+ if(value == null || value.isEmpty()){
+ return null; //ignore null and empty values
+ }
+ Resource object;
+ if(ontType == null){
+ object = new PlainLiteralImpl(value);
+ } else if(ontType == RDFS.Resource){
+ try {
+ if(validate){
+ new URI(value);
+ }
+ object = new UriRef(value);
+ } catch (URISyntaxException e) {
+ log.warn("Unable to create Reference for value {} (not a valid URI)" +
+ " -> create a literal instead",value);
+ object = new PlainLiteralImpl(value);
+ }
+ } else { //typed literal
+ Class<?> clazz = Mapping.ONT_TYPE_MAP.get(ontType);
+ if(clazz.equals(Date.class)){ //special handling for dates :(
+ //Dates are special, because Clerezza requires W3C date format
+ //and Tika uses the iso8601 variants.
+ //Because of that here is Tika used to get the Date object for
+ //the parsed value and than the LiteralFactory of Clerezza to
+ //create the TypedLiteral.
+ //Note that because of that no validation is required for
+ //Dates.
+
+ //Need a dummy metadata object to get access to the private
+ //parseDate(..) method
+ if(dummy == null) {
+ dummy = new Metadata();
+ }
+ //any Property with the Date type could be used here
+ dummy.add(DATE.getName(), value);
+ Date date = dummy.getDate(DublinCore.DATE); //access parseDate(..)
+ if(date != null){ //now use the Clerezza Literal factory
+ object = lf.createTypedLiteral(date);
+ } else { //fall back to xsd:string
+ object = new TypedLiteralImpl(value, XSD.string);
+ }
+ } else {
+ object = new TypedLiteralImpl(value, ontType);
+ }
+ if(validate && clazz != null &&
+ !clazz.equals(Date.class)){ //we need not to validate dates
+ try {
+ lf.createObject(clazz,(TypedLiteral)object);
+ } catch (NoConvertorException e) {
+ log.info("Unable to validate typed literals of type {} because" +
+ "there is no converter for Class {} registered with Clerezza",
+ ontType,clazz);
+ } catch (InvalidLiteralTypeException e) {
+ log.info("The value '{}' is not valid for dataType {}!" +
+ "create literal with type 'xsd:string' instead",
+ value,ontType);
+ object = new TypedLiteralImpl(value, XSD.string);
+ }
+ } //else no validation needed
+ }
+ if(converter != null){
+ object = converter.convert(object);
+ }
+ return object;
+ }
+ /**
+ * Used by subclasses to log mapped information
+ */
+ protected final static MappingLogger mappingLogger = new MappingLogger();
+ /**
+ * Allows nicely formatted logging of mapped properties
+ * @author Rupert Westenthaler
+ *
+ */
+ protected static class MappingLogger{
+
+ private List<NonLiteral> subjects = new ArrayList<NonLiteral>();
+ private UriRef predicate;
+ private final int intendSize = 2;
+ private final char[] intnedArray;
+ private static final int MAX_INTEND = 5;
+
+ private MappingLogger(){
+ intnedArray = new char[MAX_INTEND*intendSize];
+ Arrays.fill(intnedArray, ' ');
+ }
+ private String getIntend(int intend){
+ return String.copyValueOf(intnedArray, 0,
+ Math.min(MAX_INTEND, intend)*intendSize);
+ }
+
+ protected void log(NonLiteral subject,UriRef predicate, String prop, Resource object){
+ if(!log.isDebugEnabled()){
+ return;
+ }
+ int intendCount = subjects.indexOf(subject)+1;
+ final String intend;
+ if(intendCount < 1){
+ subjects.add(subject);
+ intendCount = subjects.size();
+ intend = getIntend(intendCount);
+ log.debug("{}context: {}",intend,subject);
+ } else if(intendCount < subjects.size()){
+ for(int i = intendCount;i<subjects.size();i++){
+ subjects.remove(i);
+ }
+ intend = getIntend(intendCount);
+ } else {
+ intend = getIntend(intendCount);
+ }
+ if(!predicate.equals(this.predicate)){
+ log.debug("{} {}",intend,predicate);
+ }
+ log.debug("{} {} {}",new Object[]{
+ intend,object,prop != null ? ("(from: '"+prop+')') : ""
+ });
+ }
+ }
+
+ public static interface Converter {
+ Resource convert(Resource value);
+ }
+}
\ No newline at end of file
Propchange: incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/Mapping.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/OntologyMappings.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/OntologyMappings.java?rev=1296705&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/OntologyMappings.java (added)
+++ incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/OntologyMappings.java Sat Mar 3 21:17:48 2012
@@ -0,0 +1,505 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.stanbol.enhancer.engines.tika.metadata;
+
+import static java.util.Collections.disjoint;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.TypedLiteral;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.ontologies.DC;
+import org.apache.clerezza.rdf.ontologies.OWL;
+import org.apache.clerezza.rdf.ontologies.RDFS;
+import org.apache.clerezza.rdf.ontologies.SKOS;
+import org.apache.clerezza.rdf.ontologies.XSD;
+import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
+import org.apache.tika.metadata.CreativeCommons;
+import org.apache.tika.metadata.DublinCore;
+import org.apache.tika.metadata.Geographic;
+import org.apache.tika.metadata.HttpHeaders;
+import org.apache.tika.metadata.MSOffice;
+import org.apache.tika.metadata.Message;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TIFF;
+import org.apache.tika.metadata.XMPDM;
+
+/**
+ * Defines mappings for keys used by Apache Tika in the {@link Metadata} to
+ * ontology properties.<p>
+ *
+ * @author Rupert Westenthaler
+ *
+ */
+public class OntologyMappings implements Iterable<Mapping>{
+
+ private static OntologyMappings defaultMappings;
+
+ private final Map<UriRef,Collection<Mapping>> mappings = new HashMap<UriRef,Collection<Mapping>>();
+ /**
+ * Used to protect the default mappings from modifications
+ */
+ private boolean readonly = false;
+ /**
+ * The media ontology namespace
+ */
+ private static String ma = "http://www.w3.org/ns/ma-ont#";
+
+ public static OntologyMappings getDefaultMappings(){
+ if(defaultMappings == null){
+ defaultMappings = new OntologyMappings();
+ //TODO: validate the defaults
+ addMediaResourceOntologyMappings(defaultMappings);
+ addNepomukMessageMappings(defaultMappings);
+ addRdfsMappings(defaultMappings);
+ }
+ return defaultMappings;
+ }
+
+ /**
+ * @param mappings
+ */
+ public static void addNepomukMessageMappings(OntologyMappings mappings) {
+ String nmo = "http://www.semanticdesktop.org/ontologies/2007/03/22/nmo#";
+ mappings.addMappings(
+ new PropertyMapping(nmo+"bbc",Message.MESSAGE_BCC));
+ mappings.addMappings(
+ new PropertyMapping(nmo+"cc",Message.MESSAGE_CC));
+ mappings.addMappings(
+ new PropertyMapping(nmo+"from",Message.MESSAGE_FROM));
+ mappings.addMappings(
+ new PropertyMapping(nmo+"to",Message.MESSAGE_TO));
+ }
+
+ /**
+ * @param mappings
+ */
+ public static void addGeoMappings(OntologyMappings mappings) {
+ mappings.addMappings(
+ new PropertyMapping(NamespaceEnum.geo+"alt",XSD.double_,Geographic.ALTITUDE.getName()));
+ mappings.addMappings(
+ new PropertyMapping(NamespaceEnum.geo+"lat",XSD.double_,Geographic.LATITUDE.getName()));
+ mappings.addMappings(
+ new PropertyMapping(NamespaceEnum.geo+"long",XSD.double_,Geographic.LONGITUDE.getName()));
+ }
+
+ /**
+ * Maps the {@link TIFF} metadata to the Nepomuk EXIF ontology. This uses
+ * the more preceise datatyped as defined by {@link TIFF} rather than
+ * xsd:string as defined for most of the properites within the Nepomuk
+ * ontology.
+ * @param mappings
+ */
+ public static void addNepomukExifMappings(OntologyMappings mappings) {
+ String exif = "http://www.semanticdesktop.org/ontologies/2007/05/10/nexif#";
+ mappings.addMappings(
+ new PropertyMapping(exif+"bitsPerSample",XSD.int_,TIFF.BITS_PER_SAMPLE.getName()));
+ mappings.addMappings(
+ new PropertyMapping(exif+"make",TIFF.EQUIPMENT_MAKE.getName()));
+ mappings.addMappings(
+ new PropertyMapping(exif+"model",TIFF.EQUIPMENT_MODEL.getName()));
+ mappings.addMappings(
+ new PropertyMapping(exif+"exposureTime",XSD.double_,TIFF.EXPOSURE_TIME.getName()));
+ mappings.addMappings(
+ new PropertyMapping(exif+"fNumber",XSD.double_,TIFF.F_NUMBER.getName()));
+ mappings.addMappings(
+ new PropertyMapping(exif+"flash",XSD.boolean_,TIFF.FLASH_FIRED.getName()));
+ mappings.addMappings(
+ new PropertyMapping(exif+"focalLength",XSD.double_,TIFF.FOCAL_LENGTH.getName()));
+ mappings.addMappings(
+ new PropertyMapping(exif+"relatedImageLength",XSD.int_,TIFF.IMAGE_LENGTH.getName()));
+ mappings.addMappings(
+ new PropertyMapping(exif+"relatedImageWidth",XSD.int_,TIFF.IMAGE_WIDTH.getName()));
+ mappings.addMappings(
+ new PropertyMapping(exif+"isoSpeedRatings",XSD.int_,TIFF.ISO_SPEED_RATINGS.getName()));
+ mappings.addMappings(
+ new PropertyMapping(exif+"orientation",XSD.string,TIFF.ORIENTATION.getName()));
+ mappings.addMappings(
+ new PropertyMapping(exif+"dateTimeOriginal",XSD.dateTime,TIFF.ORIGINAL_DATE.getName()));
+ mappings.addMappings(
+ new PropertyMapping(exif+"xResolution",XSD.double_,TIFF.RESOLUTION_HORIZONTAL.getName()));
+ mappings.addMappings(
+ new PropertyMapping(exif+"resolutionUnit",XSD.string,TIFF.RESOLUTION_UNIT.getName()));
+ mappings.addMappings(
+ new PropertyMapping(exif+"yResolution",XSD.double_,TIFF.RESOLUTION_VERTICAL.getName()));
+ mappings.addMappings(
+ new PropertyMapping(exif+"samplesPerPixel",XSD.int_,TIFF.SAMPLES_PER_PIXEL.getName()));
+ mappings.addMappings(
+ new PropertyMapping(exif+"software",TIFF.SOFTWARE.getName()));
+ }
+
+ /**
+ * Adds the Mappings for {@link DublinCore}<p>
+ * Two mappings are added for each property<ul>
+ * <li> <a href="http://dublincore.org/documents/dcmi-terms/">Dublin Core Terms</a>
+ * <li> The <a href="http://www.w3.org/TR/mediaont-10/#dc-table">DC terms
+ * mappings</a> of the Media Annotation Ontology.
+ * </ul>
+ * @param mappings The ontology mappings to add the DC mappings
+ */
+ public static void addDcMappings(OntologyMappings mappings) {
+
+ mappings.addMapping(
+ new PropertyMapping(DC.contributor,
+ DublinCore.CONTRIBUTOR,MSOffice.LAST_AUTHOR));
+ mappings.addMapping(
+ new PropertyMapping(DC.coverage,DublinCore.COVERAGE));
+ mappings.addMappings(
+ new PropertyMapping(DC.creator,
+ DublinCore.CREATOR,MSOffice.AUTHOR,"initial-creator"));
+ mappings.addMappings(
+ new PropertyMapping(DC.description,DublinCore.DESCRIPTION));
+ mappings.addMappings(
+ new PropertyMapping(DC.format,
+ DublinCore.FORMAT,HttpHeaders.CONTENT_TYPE));
+ mappings.addMappings(
+ new PropertyMapping(DC.identifier,DublinCore.IDENTIFIER));
+ mappings.addMappings(
+ new PropertyMapping(DC.language,
+ DublinCore.LANGUAGE,HttpHeaders.CONTENT_LANGUAGE));
+ mappings.addMappings(
+ new PropertyMapping(NamespaceEnum.dc+"modified",XSD.dateTime,
+ DublinCore.MODIFIED,"Last-Modified"));
+ mappings.addMappings(
+ new PropertyMapping(DC.publisher,
+ DublinCore.PUBLISHER,MSOffice.COMPANY));
+ mappings.addMappings(
+ new PropertyMapping(DC.relation,DublinCore.RELATION));
+ mappings.addMappings(
+ new PropertyMapping(DC.rights,DublinCore.RIGHTS));
+ mappings.addMappings(
+ new PropertyMapping(DC.source,DublinCore.SOURCE));
+ mappings.addMappings(
+ new PropertyMapping(DC.subject,
+ DublinCore.SUBJECT,MSOffice.KEYWORDS));
+ mappings.addMappings(
+ new PropertyMapping(DC.title,DublinCore.TITLE));
+ mappings.addMappings(
+ new PropertyMapping(DC.type,DublinCore.TYPE));
+ mappings.addMappings(
+ new PropertyMapping(DC.date,XSD.dateTime,DublinCore.DATE.getName()));
+ //MS Office -> DC
+ mappings.addMappings(
+ new PropertyMapping(NamespaceEnum.dc+"created",XSD.dateTime,
+ MSOffice.CREATION_DATE.getName(),"created"));
+
+ }
+ public static void addMediaResourceOntologyMappings(OntologyMappings mappings){
+ mappings.addMappings(
+ new PropertyMapping(ma+"hasContributor",
+ DublinCore.CONTRIBUTOR,XMPDM.ARTIST.getName(),XMPDM.COMPOSER.getName()));
+ mappings.addMapping(
+ new ResourceMapping(ma+"hasLocation",
+ new TypeMapping(ma+"Location"),
+ new PropertyMapping(ma+"locationName",DublinCore.COVERAGE)));
+ mappings.addMappings(
+ new PropertyMapping(ma+"hasCreator",
+ DublinCore.CREATOR,MSOffice.AUTHOR,"initial-creator"));
+ mappings.addMappings(
+ new PropertyMapping(ma+"description",DublinCore.DESCRIPTION));
+ mappings.addMappings(
+ new PropertyMapping(ma+"hasFormat",
+ DublinCore.FORMAT,HttpHeaders.CONTENT_TYPE));
+ /*
+ * Excerpt of the MA recommendation:
+ * The identifier of a media resource is represented in RDF by the URI
+ * of the node representing that media resource. If a resource is
+ * identified by several URI, owl:sameAs should be used.
+ */
+ mappings.addMappings(
+ new PropertyMapping(OWL.sameAs,RDFS.Resource,DublinCore.IDENTIFIER));
+ mappings.addMappings(
+ new PropertyMapping(ma+"hasLanguage",
+ DublinCore.LANGUAGE,HttpHeaders.CONTENT_LANGUAGE));
+ mappings.addMappings(
+ new PropertyMapping(ma+"editDate",XSD.dateTime,
+ DublinCore.MODIFIED,MSOffice.LAST_SAVED.getName()));
+ mappings.addMappings(
+ new PropertyMapping(ma+"hasPublisher",DublinCore.PUBLISHER));
+ mappings.addMappings(
+ new PropertyMapping(ma+"hasRelatedResource",DublinCore.RELATION));
+ mappings.addMappings(
+ new PropertyMapping(ma+"copyright",RDFS.Resource,
+ //DC:rights and cc:license
+ DublinCore.RIGHTS,CreativeCommons.LICENSE_LOCATION, CreativeCommons.LICENSE_URL,
+ XMPDM.COPYRIGHT.getName()));
+ mappings.addMappings(
+ new PropertyMapping(ma+"isMemberOf",DublinCore.SOURCE));
+ mappings.addMappings(
+ new PropertyMapping(ma+"hasKeyword",
+ DublinCore.SUBJECT,MSOffice.KEYWORDS));
+ mappings.addMappings(
+ new PropertyMapping(ma+"title",
+ DublinCore.TITLE,XMPDM.SCENE.getName(),XMPDM.TAPE_NAME.getName(),
+ XMPDM.SHOT_NAME.getName()));
+ mappings.addMapping(
+ new PropertyMapping(ma+"alternativeTitle", XMPDM.ALT_TAPE_NAME.getName()));
+ mappings.addMapping(
+ new PropertyMapping(ma+"mainOriginalTitle", XMPDM.ALBUM.getName()));
+ mappings.addMappings(
+ new PropertyMapping(ma+"hasGenre",
+ DublinCore.TYPE,XMPDM.GENRE.getName()));
+ mappings.addMappings(
+ new PropertyMapping(ma+"creationDate",XSD.dateTime,
+ DublinCore.DATE.getName(),MSOffice.CREATION_DATE.getName(),"created"));
+ mappings.addMapping(
+ new PropertyMapping(ma+"description",
+ DublinCore.DESCRIPTION,MSOffice.COMMENTS));
+
+ mappings.addMappings(
+ new PropertyMapping(ma+"hasContributor",
+ MSOffice.LAST_AUTHOR,MSOffice.AUTHOR,XMPDM.ENGINEER.getName()));
+
+
+ //other properties -> Media Ontology
+ mappings.addMappings(
+ new PropertyMapping(ma+"hasCreator","producer","initial-creator"));
+
+ //EXIF -> Media Ontology
+ mappings.addMappings(
+ new PropertyMapping(ma+"frameHeight",XSD.int_,TIFF.IMAGE_LENGTH.getName()));
+ mappings.addMappings(
+ new PropertyMapping(ma+"frameWidth",XSD.int_,TIFF.IMAGE_WIDTH.getName()));
+ mappings.addMappings(
+ new PropertyMapping(ma+"creationDate",XSD.dateTime,
+ TIFF.ORIGINAL_DATE.getName(),XMPDM.SHOT_DATE.getName()));
+
+ //XMP -> Media Ontology
+ //here we need to split up the metadata for the audio and video
+ mappings.addMapping(
+ new PropertyMapping(ma+"releaseDate", XSD.dateTime,XMPDM.RELEASE_DATE.getName()));
+ mappings.addMapping(new ResourceMapping(ma+"hasTrack",
+ new Mapping[]{ /* no required */},
+ new Mapping[]{//optional
+ new PropertyMapping(ma+"hasFormat",XSD.string,XMPDM.AUDIO_CHANNEL_TYPE.getName()),
+ new PropertyMapping(ma+"hasCompression",XSD.string,XMPDM.AUDIO_COMPRESSOR.getName()),
+ new PropertyMapping(ma+"editDate", XMPDM.AUDIO_MOD_DATE.getName()),
+ new PropertyMapping(ma+"samplingRate", XSD.int_,XMPDM.AUDIO_SAMPLE_RATE.getName())
+ }, new Mapping[]{
+ new TypeMapping(ma+"MediaFragment"),
+ new TypeMapping(ma+"Track"),
+ new TypeMapping(ma+"AudioTrack"),
+ }
+ ));
+ mappings.addMapping(new ResourceMapping(ma+"hasTrack",
+ new Mapping[]{ /* no required */},
+ new Mapping[]{//optional
+ new PropertyMapping(ma+"hasCompression",XSD.string,XMPDM.VIDEO_COMPRESSOR.getName()),
+ new PropertyMapping(ma+"editDate", XMPDM.VIDEO_MOD_DATE.getName()),
+ new PropertyMapping(ma+"frameRate", XSD.double_,XMPDM.VIDEO_FRAME_RATE.getName())
+ },
+ new Mapping[]{ //additioanl
+ new TypeMapping(ma+"MediaFragment"),
+ new TypeMapping(ma+"Track"),
+ new TypeMapping(ma+"VideoTrack"),
+ new PropertyMapping(ma+"frameHeight",XSD.int_,TIFF.IMAGE_LENGTH.getName()),
+ new PropertyMapping(ma+"frameWidth",XSD.int_,TIFF.IMAGE_WIDTH.getName())
+ }));
+ mappings.addMapping(
+ new PropertyMapping(ma+"numberOfTracks",XSD.int_,XMPDM.TRACK_NUMBER.getName()));
+ mappings.addMapping(
+ new PropertyMapping(ma+"averageBitRate",XSD.double_,
+ new Mapping.Converter(){//we need to convert from MByte/min to kByte/sec
+ @Override
+ public Resource convert(Resource value) {
+ if(value instanceof TypedLiteral &&
+ XSD.double_.equals(((TypedLiteral)value).getDataType())){
+ LiteralFactory lf = LiteralFactory.getInstance();
+ double mm = lf.createObject(Double.class, (TypedLiteral)value);
+ return lf.createTypedLiteral(Double.valueOf(
+ mm*1024/60));
+ } else {
+ return value; //do not convert
+ }
+ }
+
+ },XMPDM.FILE_DATA_RATE.getName()));
+
+ //GEO -> Media Resource Ontology
+ mappings.addMapping(new ResourceMapping(ma+"hasLocation",
+ new Mapping[]{ //required
+ new PropertyMapping(ma+"locationLatitude", XSD.double_,Geographic.LATITUDE.getName()),
+ new PropertyMapping(ma+"locationLongitude", XSD.double_,Geographic.LONGITUDE.getName())
+ },new Mapping[]{ //optional
+ new PropertyMapping(ma+"locationAltitude", XSD.double_,Geographic.ALTITUDE.getName())
+ },new Mapping[]{//additional
+ new TypeMapping(ma+"Location")
+ }));
+ }
+//TODO
+// public static void addNepomukId3Mappings(OntologyMappings mappings){
+// XMPDM.ABS_PEAK_AUDIO_FILE_PATH;
+// XMPDM.ALBUM;
+// XMPDM.ALT_TAPE_NAME;
+// XMPDM.ARTIST;
+// XMPDM.AUDIO_CHANNEL_TYPE;
+// XMPDM.AUDIO_COMPRESSOR;
+// XMPDM.AUDIO_MOD_DATE;
+// XMPDM.AUDIO_SAMPLE_RATE;
+// XMPDM.AUDIO_SAMPLE_TYPE;
+// XMPDM.COMPOSER;
+// XMPDM.COPYRIGHT;
+// XMPDM.ENGINEER;
+// XMPDM.FILE_DATA_RATE;
+// XMPDM.GENRE;
+// XMPDM.INSTRUMENT;
+// XMPDM.KEY;
+// XMPDM.LOG_COMMENT;
+// XMPDM.LOOP;
+// XMPDM.METADATA_MOD_DATE;
+// XMPDM.NUMBER_OF_BEATS;
+// XMPDM.PULL_DOWN;
+// XMPDM.RELATIVE_PEAK_AUDIO_FILE_PATH;
+// XMPDM.RELEASE_DATE;
+// XMPDM.SCALE_TYPE;
+// XMPDM.SCENE;
+// XMPDM.SHOT_DATE;
+// XMPDM.SHOT_LOCATION;
+// XMPDM.SHOT_NAME;
+// XMPDM.SPEAKER_PLACEMENT;
+// XMPDM.STRETCH_MODE;
+// XMPDM.TAPE_NAME;
+// XMPDM.TEMPO;
+// XMPDM.TIME_SIGNATURE;
+// XMPDM.TRACK_NUMBER;
+// XMPDM.VIDEO_ALPHA_MODE;
+// XMPDM.VIDEO_ALPHA_UNITY_IS_TRANSPARENT;
+// XMPDM.VIDEO_COLOR_SPACE;
+// XMPDM.VIDEO_COMPRESSOR;
+// XMPDM.VIDEO_FIELD_ORDER;
+// XMPDM.VIDEO_FRAME_RATE;
+// XMPDM.VIDEO_MOD_DATE;
+// XMPDM.VIDEO_PIXEL_ASPECT_RATIO;
+// XMPDM.VIDEO_PIXEL_DEPTH;
+// }
+ public static void addSkosMappings(OntologyMappings mappings){
+ //DC -> SKOS
+ mappings.addMappings(
+ new PropertyMapping(SKOS.prefLabel,
+ DublinCore.TITLE));
+ mappings.addMappings(
+ new PropertyMapping(SKOS.definition,
+ DublinCore.DESCRIPTION));
+ mappings.addMappings(
+ new PropertyMapping(SKOS.notation,
+ DublinCore.IDENTIFIER));
+ //MS Office -> SKOS
+ mappings.addMappings(
+ new PropertyMapping(SKOS.note,MSOffice.COMMENTS));
+ mappings.addMappings(
+ new PropertyMapping(SKOS.editorialNote,
+ MSOffice.NOTES,XMPDM.LOG_COMMENT.getName()));
+ }
+
+ public static void addRdfsMappings(OntologyMappings mappings){
+ //DC
+ mappings.addMappings(
+ new PropertyMapping(RDFS.label,DublinCore.TITLE));
+ mappings.addMappings(
+ new PropertyMapping(RDFS.comment,DublinCore.DESCRIPTION,MSOffice.COMMENTS));
+ }
+
+ /**
+ * Maps only {@link CreativeCommons#LICENSE_URL} to cc:license
+ * @param mappings
+ */
+ public static void addCreativeCommonsMappings(OntologyMappings mappings){
+ mappings.addMapping(
+ new PropertyMapping("http://creativecommons.org/ns#license",RDFS.Resource,
+ CreativeCommons.LICENSE_URL,CreativeCommons.LICENSE_LOCATION));
+
+ }
+
+
+ public void addMappings(Mapping...mappings){
+ if(mappings == null || mappings.length > 1){
+ return; //nothing to do
+ }
+ for(Mapping m : mappings){
+ addMapping(m);
+ }
+ }
+ public void addMapping(Mapping mapping){
+ if(readonly){
+ throw new IllegalStateException("This "+getClass().getSimpleName()+" instance is read only!");
+ }
+ if(mapping == null){
+ return; //nothing to do
+ }
+ Collection<Mapping> propMappings = this.mappings.get(mapping.getOntologyProperty());
+ if(propMappings == null){
+ propMappings = new HashSet<Mapping>();
+ this.mappings.put(mapping.getOntologyProperty(), propMappings);
+ }
+ propMappings.add(mapping);
+ }
+ public void removePropertyMappings(UriRef property){
+ if(readonly){
+ throw new IllegalStateException("This "+getClass().getSimpleName()+" instance is read only!");
+ }
+ this.mappings.remove(property);
+ }
+
+ public void apply(MGraph graph, UriRef context, Metadata metadata){
+ Set<String> keys = new HashSet<String>(Arrays.asList(metadata.names()));
+ for(Mapping mapping : this){
+ if(mapping.getMappedTikaProperties().isEmpty() ||
+ !disjoint(keys, mapping.getMappedTikaProperties())){
+ mapping.apply(graph, context, metadata);
+ }
+ }
+ }
+ @Override
+ public Iterator<Mapping> iterator() {
+ return new Iterator<Mapping>() {
+ Iterator<Collection<Mapping>> mappingsIt = OntologyMappings.this.mappings.values().iterator();
+ Iterator<Mapping> mappingIt = Collections.EMPTY_LIST.iterator();
+ @Override
+ public boolean hasNext() {
+ //assumes no empty lists as values of OntologyMappings.this.mappings
+ return mappingIt.hasNext() || mappingsIt.hasNext();
+ }
+
+ @Override
+ public Mapping next() {
+ //assumes no empty lists as values of OntologyMappings.this.mappings
+ if(!mappingIt.hasNext()){
+ mappingIt = mappingsIt.next().iterator();
+ }
+ return mappingIt.next();
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException("Removal not Supported!");
+ }
+
+ };
+ }
+
+}
Propchange: incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/OntologyMappings.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/PropertyMapping.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/PropertyMapping.java?rev=1296705&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/PropertyMapping.java (added)
+++ incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/PropertyMapping.java Sat Mar 3 21:17:48 2012
@@ -0,0 +1,102 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.stanbol.enhancer.engines.tika.metadata;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.tika.metadata.Metadata;
+
+public final class PropertyMapping extends Mapping {
+
+ /**
+ * A Set with the mapped properties
+ */
+ protected final Set<String> tikaProperties;
+
+ public PropertyMapping(String ontProperty, UriRef ontType,String...tikaProperties) {
+ this(ontProperty == null? null : new UriRef(ontProperty), ontType,tikaProperties);
+ }
+ public PropertyMapping(String ontProperty, UriRef ontType,Converter converter,String...tikaProperties) {
+ this(ontProperty == null? null : new UriRef(ontProperty), ontType,converter,tikaProperties);
+ }
+
+ public PropertyMapping(String ontProperty,String...tikaProperties) {
+ this(ontProperty == null? null : new UriRef(ontProperty),null,tikaProperties);
+ }
+
+ public PropertyMapping(UriRef ontProperty,String...tikaProperties) {
+ this(ontProperty,null,tikaProperties);
+ }
+ public PropertyMapping(UriRef ontProperty, UriRef ontType,String...tikaProperties) {
+ this(ontProperty,ontType,null,tikaProperties);
+ }
+ public PropertyMapping(UriRef ontProperty, UriRef ontType,Converter converter,String...tikaProperties) {
+ super(ontProperty, ontType,converter);
+ if(tikaProperties == null || tikaProperties.length < 1){
+ throw new IllegalArgumentException("The list of parsed Tika properties MUST NOT be NULL nor empty!");
+ }
+ this.tikaProperties = Collections.unmodifiableSet(new HashSet<String>(
+ Arrays.asList(tikaProperties)));
+ if(this.tikaProperties.contains(null) || this.tikaProperties.contains("")){
+ throw new IllegalArgumentException("Teh parsed list of Tika properties MUST NOT " +
+ "contain NULL or empty members (parsed: "+Arrays.toString(tikaProperties)+")!");
+ }
+ }
+
+ @Override
+ public boolean apply(MGraph graph, NonLiteral subject, Metadata metadata) {
+ Set<Resource> values = new HashSet<Resource>();
+ for(String tikaProperty : tikaProperties){
+ String[] tikaPropValues = metadata.getValues(tikaProperty);
+ if(tikaPropValues != null && tikaPropValues.length > 0){
+ for(String tikaPropValue : tikaPropValues){
+ Resource resource = toResource(tikaPropValue, true);
+ if(resource != null){
+ values.add(resource);
+ mappingLogger.log(subject, ontProperty, tikaProperty, resource);
+ }
+ }
+ }
+ }
+ values.remove(null);
+ values.remove("");
+ if(values.isEmpty()){
+ return false;
+ } else {
+ for(Resource resource : values){
+ graph.add(new TripleImpl(subject, ontProperty, resource));
+ }
+ return true;
+ }
+ }
+
+ @Override
+ public Set<String> getMappedTikaProperties() {
+ return tikaProperties;
+ }
+
+}
Propchange: incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/PropertyMapping.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/ResourceMapping.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/ResourceMapping.java?rev=1296705&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/ResourceMapping.java (added)
+++ incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/ResourceMapping.java Sat Mar 3 21:17:48 2012
@@ -0,0 +1,125 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.stanbol.enhancer.engines.tika.metadata;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.BNode;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.tika.metadata.Metadata;
+
+public final class ResourceMapping extends Mapping{
+
+ private static final Mapping[] EMPTY = new Mapping[]{};
+
+ Collection<Mapping> required;
+ Collection<Mapping> optional;
+ Collection<Mapping> additional;
+ Set<String> mappedTikaProperties;
+
+ public ResourceMapping(String ontProperty, Mapping...required) {
+ this(new UriRef(ontProperty), required);
+ }
+ public ResourceMapping(String ontProperty, Mapping[] required, Mapping[] optional,Mapping[] additional) {
+ this(new UriRef(ontProperty), required,optional,additional);
+ }
+
+ public ResourceMapping(UriRef ontProperty, Mapping...requried) {
+ this(ontProperty,requried,null,null);
+ }
+ public ResourceMapping(UriRef ontProperty, Mapping[] required, Mapping[] optional,Mapping[] additional) {
+ super(ontProperty,null);
+ required = required == null ? EMPTY : required;
+ optional = optional == null ? EMPTY : optional;
+ additional = additional == null ? EMPTY : additional;
+ if(required.length < 1 && optional.length <1){
+ throw new IllegalArgumentException("Neighter optional nor required subMappings where parsed!");
+ }
+ Set<String> mapped = new HashSet<String>();
+ this.required = Arrays.asList(required);
+ if(this.required.contains(null)){
+ throw new IllegalArgumentException("Tha parsed Array of required sub mappings MUST NOT contain a NULL element" +
+ "(parsed: "+this.required+")");
+ }
+ for(Mapping m : this.required){
+ mapped.addAll(m.getMappedTikaProperties());
+ }
+ this.optional = Arrays.asList(optional);
+ if(this.optional.contains(null)){
+ throw new IllegalArgumentException("Tha parsed Array of optional sub mappings MUST NOT contain a NULL element" +
+ "(parsed: "+this.optional+")");
+ }
+ for(Mapping m : this.optional){
+ mapped.addAll(m.getMappedTikaProperties());
+ }
+ mapped.remove(null);
+ this.mappedTikaProperties = Collections.unmodifiableSet(mapped);
+ //additional mappings
+ if(additional != null){
+ this.additional = Arrays.asList(additional);
+ } else {
+ this.additional = Collections.emptySet();
+ }
+ if(this.additional.contains(null)){
+ throw new IllegalArgumentException("Tha parsed Array of additional sub mappings MUST NOT contain a NULL element" +
+ "(parsed: "+this.additional+")");
+ }
+ //NOTE: additional mappings are not added to the mappedTikaProperties
+ }
+
+ @Override
+ public boolean apply(MGraph graph, NonLiteral subject, Metadata metadata) {
+ boolean added = false;
+ NonLiteral s = new BNode();
+ mappingLogger.log(subject, ontProperty, null, s);
+ if(!required.isEmpty()) {
+ MGraph g = new SimpleMGraph();
+ for(Mapping m : required){
+ if(!m.apply(g, s, metadata)){
+ return false;
+ }
+ }
+ graph.addAll(g);
+ added = true;
+ }
+ for(Mapping m : optional){
+ if(m.apply(graph, s, metadata)){
+ added = true;
+ }
+ }
+ if(added){
+ for(Mapping m : additional){
+ m.apply(graph, s, metadata);
+ }
+ graph.add(new TripleImpl(subject,ontProperty,s));
+ }
+ return added;
+ }
+ @Override
+ public Set<String> getMappedTikaProperties() {
+ return mappedTikaProperties;
+ }
+}
Propchange: incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/ResourceMapping.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/TypeMapping.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/TypeMapping.java?rev=1296705&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/TypeMapping.java (added)
+++ incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/TypeMapping.java Sat Mar 3 21:17:48 2012
@@ -0,0 +1,36 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.stanbol.enhancer.engines.tika.metadata;
+
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.ontologies.RDF;
+
+/**
+ * A {@link ConstantMapping} for the property {@link RDF#type}
+ * @author Rupert Westenthaler
+ *
+ */
+public class TypeMapping extends ConstantMapping {
+
+ public TypeMapping(String type) {
+ this(new UriRef(type));
+ }
+ public TypeMapping(UriRef...types) {
+ super(RDF.type, types);
+ }
+
+}
Propchange: incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/TypeMapping.java
------------------------------------------------------------------------------
svn:mime-type = text/plain