You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@clerezza.apache.org by re...@apache.org on 2013/10/15 16:19:36 UTC
svn commit: r1532350 - in /clerezza/trunk/rdf.utils/src/main:
java/org/apache/clerezza/rdf/utils/
java/org/apache/clerezza/rdf/utils/smushing/ resources/
Author: reto
Date: Tue Oct 15 14:19:35 2013
New Revision: 1532350
URL: http://svn.apache.org/r1532350
Log:
CLEREZZA-823: separate smusher classes allow overwriting the getPreferedIri method
Added:
clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/
clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/BaseSmusher.java
clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/IfpSmusher.java
- copied, changed from r1532294, clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java
clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/SameAsSmusher.java
- copied, changed from r1532294, clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java
clerezza/trunk/rdf.utils/src/main/resources/
Modified:
clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java
Modified: clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java
URL: http://svn.apache.org/viewvc/clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java?rev=1532350&r1=1532349&r2=1532350&view=diff
==============================================================================
--- clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java (original)
+++ clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java Tue Oct 15 14:19:35 2013
@@ -18,268 +18,52 @@
*/
package org.apache.clerezza.rdf.utils;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.clerezza.rdf.core.BNode;
import org.apache.clerezza.rdf.core.MGraph;
-import org.apache.clerezza.rdf.core.NonLiteral;
-import org.apache.clerezza.rdf.core.Resource;
-import org.apache.clerezza.rdf.core.Triple;
import org.apache.clerezza.rdf.core.TripleCollection;
-import org.apache.clerezza.rdf.core.UriRef;
-import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
-import org.apache.clerezza.rdf.core.impl.TripleImpl;
-import org.apache.clerezza.rdf.ontologies.OWL;
-import org.apache.clerezza.rdf.ontologies.RDF;
+import org.apache.clerezza.rdf.core.access.LockableMGraph;
+import org.apache.clerezza.rdf.core.access.LockableMGraphWrapper;
+import org.apache.clerezza.rdf.utils.smushing.IfpSmusher;
+import org.apache.clerezza.rdf.utils.smushing.SameAsSmusher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
- * A utility to equate duplicate nodes in an Mgarph, currently only nodes with
- * a shared ifp are equated.
- *
+ * A utility to smush equivalent resources. For greater flexibility use the
+ * classes in the smushing package.
+ *
* @author reto
*/
public class Smusher {
-
+
static final Logger log = LoggerFactory.getLogger(Smusher.class);
/**
- * smush mGaph given the ontological facts. Currently it does only
- * one step ifp smushin, i.e. only ifps are taken in account and only
- * nodes that have the same node as ifp object in the orignal graph are
- * equates. (calling the method a second time might lead to additional
- * smushings.)
+ * smush mGaph given the ontological facts. Currently it does only one step
+ * ifp smushin, i.e. only ifps are taken in account and only nodes that have
+ * the same node as ifp object in the orignal graph are equates. (calling
+ * the method a second time might lead to additional smushings.)
*
* @param mGraph
* @param tBox
*/
public static void smush(MGraph mGraph, TripleCollection tBox) {
- final Set<UriRef> ifps = getIfps(tBox);
- final Map<PredicateObject, Set<NonLiteral>> ifp2nodesMap = new HashMap<PredicateObject, Set<NonLiteral>>();
- for (Iterator<Triple> it = mGraph.iterator(); it.hasNext();) {
- final Triple triple = it.next();
- final UriRef predicate = triple.getPredicate();
- if (!ifps.contains(predicate)) {
- continue;
- }
- final PredicateObject po = new PredicateObject(predicate, triple.getObject());
- Set<NonLiteral> equivalentNodes = ifp2nodesMap.get(po);
- if (equivalentNodes == null) {
- equivalentNodes = new HashSet<NonLiteral>();
- ifp2nodesMap.put(po, equivalentNodes);
- }
- equivalentNodes.add(triple.getSubject());
- }
- Set<Set<NonLiteral>> unitedEquivalenceSets = uniteSetsWithCommonElement(ifp2nodesMap.values());
- smush(mGraph, unitedEquivalenceSets);
+ smush(lockable(mGraph), tBox);
}
-
+
public static void sameAsSmush(MGraph mGraph, TripleCollection owlSameStatements) {
-
- log.info("Starting smushing");
-
- // This hashmap contains a uri (key) and the set of equivalent uris (value)
- final Map<NonLiteral, Set<NonLiteral>> node2EquivalenceSet = new HashMap<NonLiteral, Set<NonLiteral>>();
-
- log.info("Creating the sets of equivalent uris of each subject or object in the owl:sameAs statements");
- // Determines for each subject and object in all the owl:sameAs statements the set of ewquivalent uris
- for (Iterator<Triple> it = owlSameStatements.iterator(); it.hasNext();) {
- final Triple triple = it.next();
- final UriRef predicate = triple.getPredicate();
- if (!predicate.equals(OWL.sameAs)) {
- throw new RuntimeException("Statements must use only <http://www.w3.org/2002/07/owl#sameAs> predicate.");
- }
- final NonLiteral subject = triple.getSubject();
- final NonLiteral object = (NonLiteral)triple.getObject();
-
- Set<NonLiteral> equivalentNodes = node2EquivalenceSet.get(subject);
-
- // if there is not a set of equivalent uris then create a new set
- if (equivalentNodes == null) {
- equivalentNodes = node2EquivalenceSet.get(object);
- if (equivalentNodes == null) {
- equivalentNodes = new HashSet<NonLiteral>();
- }
- }
-
- // add both subject and object of the owl:sameAs statement to the set of equivalent uris
- equivalentNodes.add(subject);
- equivalentNodes.add(object);
-
- // use both uris in the owl:sameAs statement as keys for the set of equivalent uris
- node2EquivalenceSet.put(subject, equivalentNodes);
- node2EquivalenceSet.put(object, equivalentNodes);
-
- log.info("Sets of equivalent uris created.");
-
- }
-
- // This set contains the sets of equivalent uris
- Set<Set<NonLiteral>> unitedEquivalenceSets = new HashSet<Set<NonLiteral>>(node2EquivalenceSet.values());
- smush(mGraph, unitedEquivalenceSets);
+ sameAsSmush(lockable(mGraph), owlSameStatements);
}
- public static void smush(MGraph mGraph, Set<Set<NonLiteral>> unitedEquivalenceSets) {
- Map<NonLiteral, NonLiteral> current2ReplacementMap = new HashMap<NonLiteral, NonLiteral>();
- final MGraph owlSameAsGraph = new SimpleMGraph();
- for (Set<NonLiteral> equivalenceSet : unitedEquivalenceSets) {
- final NonLiteral replacement = getReplacementFor(equivalenceSet, owlSameAsGraph);
- for (NonLiteral current : equivalenceSet) {
- if (!current.equals(replacement)) {
- current2ReplacementMap.put(current, replacement);
- }
- }
- }
- final Set<Triple> newTriples = new HashSet<Triple>();
- for (Iterator<Triple> it = mGraph.iterator(); it.hasNext();) {
- final Triple triple = it.next();
- Triple replacementTriple = null;
- final NonLiteral subject = triple.getSubject();
- NonLiteral subjectReplacement =
- current2ReplacementMap.get(subject);
- final Resource object = triple.getObject();
- @SuppressWarnings("element-type-mismatch")
- Resource objectReplacement = current2ReplacementMap.get(object);
- if ((subjectReplacement != null) || (objectReplacement != null)) {
- it.remove();
- if (subjectReplacement == null) {
- subjectReplacement = subject;
- }
- if (objectReplacement == null) {
- objectReplacement = object;
- }
- newTriples.add(new TripleImpl(subjectReplacement,
- triple.getPredicate(), objectReplacement));
- }
- }
- for (Triple triple : newTriples) {
- mGraph.add(triple);
- }
- mGraph.addAll(owlSameAsGraph);
- }
-
- private static Set<UriRef> getIfps(TripleCollection tBox) {
- final Iterator<Triple> ifpDefinitions = tBox.filter(null, RDF.type,
- OWL.InverseFunctionalProperty);
- final Set<UriRef> ifps = new HashSet<UriRef>();
- while (ifpDefinitions.hasNext()) {
- final Triple triple = ifpDefinitions.next();
- ifps.add((UriRef) triple.getSubject());
- }
- return ifps;
- }
-
- private static NonLiteral getReplacementFor(Set<NonLiteral> equivalenceSet,
- MGraph owlSameAsGraph) {
- final Set<UriRef> uriRefs = new HashSet<UriRef>();
- for (NonLiteral nonLiteral : equivalenceSet) {
- if (nonLiteral instanceof UriRef) {
- uriRefs.add((UriRef) nonLiteral);
- }
- }
- switch (uriRefs.size()) {
- case 1:
- return uriRefs.iterator().next();
- case 0:
- return new BNode();
- }
- final Iterator<UriRef> uriRefIter = uriRefs.iterator();
- //instead of an arbitrary one we might either decide lexicographically
- //or look at their frequency in mGraph
- final UriRef first = uriRefIter.next();
- while (uriRefIter.hasNext()) {
- UriRef uriRef = uriRefIter.next();
- owlSameAsGraph.add(new TripleImpl(uriRef, OWL.sameAs, first));
- }
- return first;
- }
-
- private static <T> Set<Set<T>> uniteSetsWithCommonElement(
- Collection<Set<T>> originalSets) {
- Set<Set<T>> result = new HashSet<Set<T>>();
- Iterator<Set<T>> iter = originalSets.iterator();
- while (iter.hasNext()) {
- Set<T> originalSet = iter.next();
- Set<T> matchingSet = getMatchinSet(originalSet, result);
- if (matchingSet != null) {
- matchingSet.addAll(originalSet);
- } else {
- result.add(new HashSet<T>(originalSet));
- }
- }
- if (result.size() < originalSets.size()) {
- return uniteSetsWithCommonElement(result);
- } else {
- return result;
- }
+ public static void smush(LockableMGraph mGraph, TripleCollection tBox) {
+ new IfpSmusher().smush(mGraph, tBox);
}
- private static <T> Set<T> getMatchinSet(Set<T> set, Set<Set<T>> setOfSet) {
- for (Set<T> current : setOfSet) {
- if (shareElements(set,current)) {
- return current;
- }
- }
- return null;
+ public static void sameAsSmush(LockableMGraph mGraph, TripleCollection owlSameStatements) {
+ new SameAsSmusher().smush(mGraph, owlSameStatements, true);
}
- private static <T> boolean shareElements(Set<T> set1, Set<T> set2) {
- for (T elem : set2) {
- if (set1.contains(elem)) {
- return true;
- }
- }
- return false;
+ private static LockableMGraph lockable(MGraph mGraph) {
+ return mGraph instanceof LockableMGraph ?
+ (LockableMGraph) mGraph : new LockableMGraphWrapper(mGraph);
}
-
-
- static class PredicateObject {
-
- final UriRef predicate;
- final Resource object;
-
- public PredicateObject(UriRef predicate, Resource object) {
- this.predicate = predicate;
- this.object = object;
- }
-
- @Override
- public boolean equals(Object obj) {
- if (obj == null) {
- return false;
- }
- if (getClass() != obj.getClass()) {
- return false;
- }
- final PredicateObject other = (PredicateObject) obj;
- if (this.predicate != other.predicate && !this.predicate.equals(other.predicate)) {
- return false;
- }
- if (this.object != other.object && !this.object.equals(other.object)) {
- return false;
- }
- return true;
- }
-
- @Override
- public int hashCode() {
- int hash = 3;
- hash = 29 * hash + this.predicate.hashCode();
- hash = 13 * hash + this.object.hashCode();
- return hash;
- }
-
- @Override
- public String toString() {
- return "("+predicate+", "+object+")";
- }
-
-
- };
}
Added: clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/BaseSmusher.java
URL: http://svn.apache.org/viewvc/clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/BaseSmusher.java?rev=1532350&view=auto
==============================================================================
--- clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/BaseSmusher.java (added)
+++ clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/BaseSmusher.java Tue Oct 15 14:19:35 2013
@@ -0,0 +1,136 @@
+/*
+ * Copyright 2013 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.clerezza.rdf.utils.smushing;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.locks.Lock;
+import org.apache.clerezza.rdf.core.BNode;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.access.LockableMGraph;
+import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.clerezza.rdf.ontologies.OWL;
+
+/**
+ *
+ * @author Reto
+ */
+public class BaseSmusher {
+
+ /**
+ * Smushes the resources in mGraph that belong to the same set in equivalenceSets,
+ * i.e. it adds all properties to one of the resources in the equivalence set.
+ *
+ * Optionally owl:sameAs statement are added that point from the IRIs that
+ * no longer have properties to the one with properties. If addOwlSameAs
+ * is false the IRIs will just disappear from the graph.
+ *
+ * @param mGraph the graph to smush
+ * @param equivalenceSets sets of equivalent resources
+ * @param addOwlSameAs whether owl:sameAs statements should be added
+ */
+ public void smush(LockableMGraph mGraph, Set<Set<NonLiteral>> equivalenceSets, boolean addOwlSameAs) {
+ Map<NonLiteral, NonLiteral> current2ReplacementMap = new HashMap<NonLiteral, NonLiteral>();
+ final MGraph owlSameAsGraph = new SimpleMGraph();
+ for (Set<NonLiteral> equivalenceSet : equivalenceSets) {
+ final NonLiteral replacement = getReplacementFor(equivalenceSet, owlSameAsGraph);
+ for (NonLiteral current : equivalenceSet) {
+ if (!current.equals(replacement)) {
+ current2ReplacementMap.put(current, replacement);
+ }
+ }
+ }
+ final Set<Triple> newTriples = new HashSet<Triple>();
+ Lock l = mGraph.getLock().writeLock();
+ l.lock();
+ try {
+ for (Iterator<Triple> it = mGraph.iterator(); it.hasNext();) {
+ final Triple triple = it.next();
+ final NonLiteral subject = triple.getSubject();
+ NonLiteral subjectReplacement = current2ReplacementMap.get(subject);
+ final Resource object = triple.getObject();
+ @SuppressWarnings(value = "element-type-mismatch")
+ Resource objectReplacement = current2ReplacementMap.get(object);
+ if ((subjectReplacement != null) || (objectReplacement != null)) {
+ it.remove();
+ if (subjectReplacement == null) {
+ subjectReplacement = subject;
+ }
+ if (objectReplacement == null) {
+ objectReplacement = object;
+ }
+ newTriples.add(new TripleImpl(subjectReplacement, triple.getPredicate(), objectReplacement));
+ }
+ }
+ for (Triple triple : newTriples) {
+ mGraph.add(triple);
+ }
+ mGraph.addAll(owlSameAsGraph);
+ } finally {
+ l.unlock();
+ }
+ }
+
+ private NonLiteral getReplacementFor(Set<NonLiteral> equivalenceSet,
+ MGraph owlSameAsGraph) {
+ final Set<UriRef> uriRefs = new HashSet<UriRef>();
+ for (NonLiteral nonLiteral : equivalenceSet) {
+ if (nonLiteral instanceof UriRef) {
+ uriRefs.add((UriRef) nonLiteral);
+ }
+ }
+ switch (uriRefs.size()) {
+ case 1:
+ return uriRefs.iterator().next();
+ case 0:
+ return new BNode();
+ }
+ final UriRef preferedIri = getPreferedIri(uriRefs);
+ final Iterator<UriRef> uriRefIter = uriRefs.iterator();
+ while (uriRefIter.hasNext()) {
+ UriRef uriRef = uriRefIter.next();
+ if (!uriRef.equals(preferedIri)) {
+ owlSameAsGraph.add(new TripleImpl(uriRef, OWL.sameAs, preferedIri));
+ }
+ }
+ return preferedIri;
+ }
+
+
+ /**
+ * Returns a prefered IRI for the IRIs in a set. Typically and in the
+ * default implementation the IRI will be one of the set. Note however that
+ * subclass implementations may also return another IRI to be used.
+ *
+ * @param uriRefs
+ * @return
+ */
+ protected UriRef getPreferedIri(Set<UriRef> uriRefs) {
+ final Iterator<UriRef> uriRefIter = uriRefs.iterator();
+ //instead of an arbitrary one we might either decide lexicographically
+ //or look at their frequency in mGraph
+ return uriRefIter.next();
+ }
+
+}
Copied: clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/IfpSmusher.java (from r1532294, clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java)
URL: http://svn.apache.org/viewvc/clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/IfpSmusher.java?p2=clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/IfpSmusher.java&p1=clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java&r1=1532294&r2=1532350&rev=1532350&view=diff
==============================================================================
--- clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java (original)
+++ clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/IfpSmusher.java Tue Oct 15 14:19:35 2013
@@ -16,8 +16,9 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.apache.clerezza.rdf.utils;
+package org.apache.clerezza.rdf.utils.smushing;
+import org.apache.clerezza.rdf.utils.*;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
@@ -32,6 +33,7 @@ import org.apache.clerezza.rdf.core.Reso
import org.apache.clerezza.rdf.core.Triple;
import org.apache.clerezza.rdf.core.TripleCollection;
import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.access.LockableMGraph;
import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
import org.apache.clerezza.rdf.core.impl.TripleImpl;
import org.apache.clerezza.rdf.ontologies.OWL;
@@ -45,9 +47,9 @@ import org.slf4j.LoggerFactory;
*
* @author reto
*/
-public class Smusher {
+public class IfpSmusher extends BaseSmusher {
- static final Logger log = LoggerFactory.getLogger(Smusher.class);
+ static final Logger log = LoggerFactory.getLogger(IfpSmusher.class);
/**
* smush mGaph given the ontological facts. Currently it does only
@@ -59,7 +61,7 @@ public class Smusher {
* @param mGraph
* @param tBox
*/
- public static void smush(MGraph mGraph, TripleCollection tBox) {
+ public void smush(LockableMGraph mGraph, TripleCollection tBox) {
final Set<UriRef> ifps = getIfps(tBox);
final Map<PredicateObject, Set<NonLiteral>> ifp2nodesMap = new HashMap<PredicateObject, Set<NonLiteral>>();
for (Iterator<Triple> it = mGraph.iterator(); it.hasNext();) {
@@ -77,94 +79,11 @@ public class Smusher {
equivalentNodes.add(triple.getSubject());
}
Set<Set<NonLiteral>> unitedEquivalenceSets = uniteSetsWithCommonElement(ifp2nodesMap.values());
- smush(mGraph, unitedEquivalenceSets);
+ smush(mGraph, unitedEquivalenceSets, true);
}
- public static void sameAsSmush(MGraph mGraph, TripleCollection owlSameStatements) {
-
- log.info("Starting smushing");
-
- // This hashmap contains a uri (key) and the set of equivalent uris (value)
- final Map<NonLiteral, Set<NonLiteral>> node2EquivalenceSet = new HashMap<NonLiteral, Set<NonLiteral>>();
-
- log.info("Creating the sets of equivalent uris of each subject or object in the owl:sameAs statements");
- // Determines for each subject and object in all the owl:sameAs statements the set of ewquivalent uris
- for (Iterator<Triple> it = owlSameStatements.iterator(); it.hasNext();) {
- final Triple triple = it.next();
- final UriRef predicate = triple.getPredicate();
- if (!predicate.equals(OWL.sameAs)) {
- throw new RuntimeException("Statements must use only <http://www.w3.org/2002/07/owl#sameAs> predicate.");
- }
- final NonLiteral subject = triple.getSubject();
- final NonLiteral object = (NonLiteral)triple.getObject();
-
- Set<NonLiteral> equivalentNodes = node2EquivalenceSet.get(subject);
-
- // if there is not a set of equivalent uris then create a new set
- if (equivalentNodes == null) {
- equivalentNodes = node2EquivalenceSet.get(object);
- if (equivalentNodes == null) {
- equivalentNodes = new HashSet<NonLiteral>();
- }
- }
-
- // add both subject and object of the owl:sameAs statement to the set of equivalent uris
- equivalentNodes.add(subject);
- equivalentNodes.add(object);
-
- // use both uris in the owl:sameAs statement as keys for the set of equivalent uris
- node2EquivalenceSet.put(subject, equivalentNodes);
- node2EquivalenceSet.put(object, equivalentNodes);
-
- log.info("Sets of equivalent uris created.");
-
- }
-
- // This set contains the sets of equivalent uris
- Set<Set<NonLiteral>> unitedEquivalenceSets = new HashSet<Set<NonLiteral>>(node2EquivalenceSet.values());
- smush(mGraph, unitedEquivalenceSets);
- }
-
- public static void smush(MGraph mGraph, Set<Set<NonLiteral>> unitedEquivalenceSets) {
- Map<NonLiteral, NonLiteral> current2ReplacementMap = new HashMap<NonLiteral, NonLiteral>();
- final MGraph owlSameAsGraph = new SimpleMGraph();
- for (Set<NonLiteral> equivalenceSet : unitedEquivalenceSets) {
- final NonLiteral replacement = getReplacementFor(equivalenceSet, owlSameAsGraph);
- for (NonLiteral current : equivalenceSet) {
- if (!current.equals(replacement)) {
- current2ReplacementMap.put(current, replacement);
- }
- }
- }
- final Set<Triple> newTriples = new HashSet<Triple>();
- for (Iterator<Triple> it = mGraph.iterator(); it.hasNext();) {
- final Triple triple = it.next();
- Triple replacementTriple = null;
- final NonLiteral subject = triple.getSubject();
- NonLiteral subjectReplacement =
- current2ReplacementMap.get(subject);
- final Resource object = triple.getObject();
- @SuppressWarnings("element-type-mismatch")
- Resource objectReplacement = current2ReplacementMap.get(object);
- if ((subjectReplacement != null) || (objectReplacement != null)) {
- it.remove();
- if (subjectReplacement == null) {
- subjectReplacement = subject;
- }
- if (objectReplacement == null) {
- objectReplacement = object;
- }
- newTriples.add(new TripleImpl(subjectReplacement,
- triple.getPredicate(), objectReplacement));
- }
- }
- for (Triple triple : newTriples) {
- mGraph.add(triple);
- }
- mGraph.addAll(owlSameAsGraph);
- }
- private static Set<UriRef> getIfps(TripleCollection tBox) {
+ private Set<UriRef> getIfps(TripleCollection tBox) {
final Iterator<Triple> ifpDefinitions = tBox.filter(null, RDF.type,
OWL.InverseFunctionalProperty);
final Set<UriRef> ifps = new HashSet<UriRef>();
@@ -175,37 +94,13 @@ public class Smusher {
return ifps;
}
- private static NonLiteral getReplacementFor(Set<NonLiteral> equivalenceSet,
- MGraph owlSameAsGraph) {
- final Set<UriRef> uriRefs = new HashSet<UriRef>();
- for (NonLiteral nonLiteral : equivalenceSet) {
- if (nonLiteral instanceof UriRef) {
- uriRefs.add((UriRef) nonLiteral);
- }
- }
- switch (uriRefs.size()) {
- case 1:
- return uriRefs.iterator().next();
- case 0:
- return new BNode();
- }
- final Iterator<UriRef> uriRefIter = uriRefs.iterator();
- //instead of an arbitrary one we might either decide lexicographically
- //or look at their frequency in mGraph
- final UriRef first = uriRefIter.next();
- while (uriRefIter.hasNext()) {
- UriRef uriRef = uriRefIter.next();
- owlSameAsGraph.add(new TripleImpl(uriRef, OWL.sameAs, first));
- }
- return first;
- }
-
- private static <T> Set<Set<T>> uniteSetsWithCommonElement(
+ private <T> Set<Set<T>> uniteSetsWithCommonElement(
Collection<Set<T>> originalSets) {
Set<Set<T>> result = new HashSet<Set<T>>();
Iterator<Set<T>> iter = originalSets.iterator();
while (iter.hasNext()) {
Set<T> originalSet = iter.next();
+ //TODO this could be done more efficiently with a map
Set<T> matchingSet = getMatchinSet(originalSet, result);
if (matchingSet != null) {
matchingSet.addAll(originalSet);
@@ -220,7 +115,7 @@ public class Smusher {
}
}
- private static <T> Set<T> getMatchinSet(Set<T> set, Set<Set<T>> setOfSet) {
+ private <T> Set<T> getMatchinSet(Set<T> set, Set<Set<T>> setOfSet) {
for (Set<T> current : setOfSet) {
if (shareElements(set,current)) {
return current;
@@ -229,7 +124,7 @@ public class Smusher {
return null;
}
- private static <T> boolean shareElements(Set<T> set1, Set<T> set2) {
+ private <T> boolean shareElements(Set<T> set1, Set<T> set2) {
for (T elem : set2) {
if (set1.contains(elem)) {
return true;
@@ -239,7 +134,7 @@ public class Smusher {
}
- static class PredicateObject {
+ class PredicateObject {
final UriRef predicate;
final Resource object;
Copied: clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/SameAsSmusher.java (from r1532294, clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java)
URL: http://svn.apache.org/viewvc/clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/SameAsSmusher.java?p2=clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/SameAsSmusher.java&p1=clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java&r1=1532294&r2=1532350&rev=1532350&view=diff
==============================================================================
--- clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java (original)
+++ clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/SameAsSmusher.java Tue Oct 15 14:19:35 2013
@@ -16,71 +16,43 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.apache.clerezza.rdf.utils;
+package org.apache.clerezza.rdf.utils.smushing;
-import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
-import org.apache.clerezza.rdf.core.BNode;
-import org.apache.clerezza.rdf.core.MGraph;
import org.apache.clerezza.rdf.core.NonLiteral;
-import org.apache.clerezza.rdf.core.Resource;
import org.apache.clerezza.rdf.core.Triple;
import org.apache.clerezza.rdf.core.TripleCollection;
import org.apache.clerezza.rdf.core.UriRef;
-import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
-import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.clerezza.rdf.core.access.LockableMGraph;
import org.apache.clerezza.rdf.ontologies.OWL;
-import org.apache.clerezza.rdf.ontologies.RDF;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
- * A utility to equate duplicate nodes in an Mgarph, currently only nodes with
- * a shared ifp are equated.
+ * A utility to equate duplicate nodes in an Mgraph. This unifies owl:sameAs
+ * resources.
*
* @author reto
*/
-public class Smusher {
+public class SameAsSmusher extends BaseSmusher {
+
+ static final Logger log = LoggerFactory.getLogger(SameAsSmusher.class);
- static final Logger log = LoggerFactory.getLogger(Smusher.class);
-
/**
- * smush mGaph given the ontological facts. Currently it does only
- * one step ifp smushin, i.e. only ifps are taken in account and only
- * nodes that have the same node as ifp object in the orignal graph are
- * equates. (calling the method a second time might lead to additional
- * smushings.)
- *
+ * This will ensure that all properties of sameAs resources are associated
+ * to the preferedIri as returned by {@code getPreferedIri}
* @param mGraph
- * @param tBox
+ * @param owlSameStatements
+ * @param addCanonicalSameAsStatements if true owl:sameAsSteemntes with the preferedIri as object will be added
*/
- public static void smush(MGraph mGraph, TripleCollection tBox) {
- final Set<UriRef> ifps = getIfps(tBox);
- final Map<PredicateObject, Set<NonLiteral>> ifp2nodesMap = new HashMap<PredicateObject, Set<NonLiteral>>();
- for (Iterator<Triple> it = mGraph.iterator(); it.hasNext();) {
- final Triple triple = it.next();
- final UriRef predicate = triple.getPredicate();
- if (!ifps.contains(predicate)) {
- continue;
- }
- final PredicateObject po = new PredicateObject(predicate, triple.getObject());
- Set<NonLiteral> equivalentNodes = ifp2nodesMap.get(po);
- if (equivalentNodes == null) {
- equivalentNodes = new HashSet<NonLiteral>();
- ifp2nodesMap.put(po, equivalentNodes);
- }
- equivalentNodes.add(triple.getSubject());
- }
- Set<Set<NonLiteral>> unitedEquivalenceSets = uniteSetsWithCommonElement(ifp2nodesMap.values());
- smush(mGraph, unitedEquivalenceSets);
- }
-
- public static void sameAsSmush(MGraph mGraph, TripleCollection owlSameStatements) {
+ public void smush(LockableMGraph mGraph,
+ TripleCollection owlSameStatements,
+ boolean addCanonicalSameAsStatements) {
log.info("Starting smushing");
@@ -96,6 +68,7 @@ public class Smusher {
throw new RuntimeException("Statements must use only <http://www.w3.org/2002/07/owl#sameAs> predicate.");
}
final NonLiteral subject = triple.getSubject();
+ //literals not yet supported
final NonLiteral object = (NonLiteral)triple.getObject();
Set<NonLiteral> equivalentNodes = node2EquivalenceSet.get(subject);
@@ -106,6 +79,18 @@ public class Smusher {
if (equivalentNodes == null) {
equivalentNodes = new HashSet<NonLiteral>();
}
+ } else {
+ Set<NonLiteral> objectSet = node2EquivalenceSet.get(object);
+ if (object != null) {
+ //merge two sets
+ for (NonLiteral res : objectSet) {
+ node2EquivalenceSet.remove(res);
+ }
+ for (NonLiteral res : objectSet) {
+ node2EquivalenceSet.put(res,equivalentNodes);
+ }
+ equivalentNodes.addAll(objectSet);
+ }
}
// add both subject and object of the owl:sameAs statement to the set of equivalent uris
@@ -122,164 +107,8 @@ public class Smusher {
// This set contains the sets of equivalent uris
Set<Set<NonLiteral>> unitedEquivalenceSets = new HashSet<Set<NonLiteral>>(node2EquivalenceSet.values());
- smush(mGraph, unitedEquivalenceSets);
- }
-
- public static void smush(MGraph mGraph, Set<Set<NonLiteral>> unitedEquivalenceSets) {
- Map<NonLiteral, NonLiteral> current2ReplacementMap = new HashMap<NonLiteral, NonLiteral>();
- final MGraph owlSameAsGraph = new SimpleMGraph();
- for (Set<NonLiteral> equivalenceSet : unitedEquivalenceSets) {
- final NonLiteral replacement = getReplacementFor(equivalenceSet, owlSameAsGraph);
- for (NonLiteral current : equivalenceSet) {
- if (!current.equals(replacement)) {
- current2ReplacementMap.put(current, replacement);
- }
- }
- }
- final Set<Triple> newTriples = new HashSet<Triple>();
- for (Iterator<Triple> it = mGraph.iterator(); it.hasNext();) {
- final Triple triple = it.next();
- Triple replacementTriple = null;
- final NonLiteral subject = triple.getSubject();
- NonLiteral subjectReplacement =
- current2ReplacementMap.get(subject);
- final Resource object = triple.getObject();
- @SuppressWarnings("element-type-mismatch")
- Resource objectReplacement = current2ReplacementMap.get(object);
- if ((subjectReplacement != null) || (objectReplacement != null)) {
- it.remove();
- if (subjectReplacement == null) {
- subjectReplacement = subject;
- }
- if (objectReplacement == null) {
- objectReplacement = object;
- }
- newTriples.add(new TripleImpl(subjectReplacement,
- triple.getPredicate(), objectReplacement));
- }
- }
- for (Triple triple : newTriples) {
- mGraph.add(triple);
- }
- mGraph.addAll(owlSameAsGraph);
- }
-
- private static Set<UriRef> getIfps(TripleCollection tBox) {
- final Iterator<Triple> ifpDefinitions = tBox.filter(null, RDF.type,
- OWL.InverseFunctionalProperty);
- final Set<UriRef> ifps = new HashSet<UriRef>();
- while (ifpDefinitions.hasNext()) {
- final Triple triple = ifpDefinitions.next();
- ifps.add((UriRef) triple.getSubject());
- }
- return ifps;
- }
-
- private static NonLiteral getReplacementFor(Set<NonLiteral> equivalenceSet,
- MGraph owlSameAsGraph) {
- final Set<UriRef> uriRefs = new HashSet<UriRef>();
- for (NonLiteral nonLiteral : equivalenceSet) {
- if (nonLiteral instanceof UriRef) {
- uriRefs.add((UriRef) nonLiteral);
- }
- }
- switch (uriRefs.size()) {
- case 1:
- return uriRefs.iterator().next();
- case 0:
- return new BNode();
- }
- final Iterator<UriRef> uriRefIter = uriRefs.iterator();
- //instead of an arbitrary one we might either decide lexicographically
- //or look at their frequency in mGraph
- final UriRef first = uriRefIter.next();
- while (uriRefIter.hasNext()) {
- UriRef uriRef = uriRefIter.next();
- owlSameAsGraph.add(new TripleImpl(uriRef, OWL.sameAs, first));
- }
- return first;
- }
-
- private static <T> Set<Set<T>> uniteSetsWithCommonElement(
- Collection<Set<T>> originalSets) {
- Set<Set<T>> result = new HashSet<Set<T>>();
- Iterator<Set<T>> iter = originalSets.iterator();
- while (iter.hasNext()) {
- Set<T> originalSet = iter.next();
- Set<T> matchingSet = getMatchinSet(originalSet, result);
- if (matchingSet != null) {
- matchingSet.addAll(originalSet);
- } else {
- result.add(new HashSet<T>(originalSet));
- }
- }
- if (result.size() < originalSets.size()) {
- return uniteSetsWithCommonElement(result);
- } else {
- return result;
- }
+ smush(mGraph, unitedEquivalenceSets, addCanonicalSameAsStatements);
}
- private static <T> Set<T> getMatchinSet(Set<T> set, Set<Set<T>> setOfSet) {
- for (Set<T> current : setOfSet) {
- if (shareElements(set,current)) {
- return current;
- }
- }
- return null;
- }
-
- private static <T> boolean shareElements(Set<T> set1, Set<T> set2) {
- for (T elem : set2) {
- if (set1.contains(elem)) {
- return true;
- }
- }
- return false;
- }
-
- static class PredicateObject {
-
- final UriRef predicate;
- final Resource object;
-
- public PredicateObject(UriRef predicate, Resource object) {
- this.predicate = predicate;
- this.object = object;
- }
-
- @Override
- public boolean equals(Object obj) {
- if (obj == null) {
- return false;
- }
- if (getClass() != obj.getClass()) {
- return false;
- }
- final PredicateObject other = (PredicateObject) obj;
- if (this.predicate != other.predicate && !this.predicate.equals(other.predicate)) {
- return false;
- }
- if (this.object != other.object && !this.object.equals(other.object)) {
- return false;
- }
- return true;
- }
-
- @Override
- public int hashCode() {
- int hash = 3;
- hash = 29 * hash + this.predicate.hashCode();
- hash = 13 * hash + this.object.hashCode();
- return hash;
- }
-
- @Override
- public String toString() {
- return "("+predicate+", "+object+")";
- }
-
-
- };
}