You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@clerezza.apache.org by re...@apache.org on 2013/10/15 16:19:36 UTC

svn commit: r1532350 - in /clerezza/trunk/rdf.utils/src/main: java/org/apache/clerezza/rdf/utils/ java/org/apache/clerezza/rdf/utils/smushing/ resources/

Author: reto
Date: Tue Oct 15 14:19:35 2013
New Revision: 1532350

URL: http://svn.apache.org/r1532350
Log:
CLEREZZA-823: separate smusher classes allow overwriting the getPreferedIri method

Added:
    clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/
    clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/BaseSmusher.java
    clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/IfpSmusher.java
      - copied, changed from r1532294, clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java
    clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/SameAsSmusher.java
      - copied, changed from r1532294, clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java
    clerezza/trunk/rdf.utils/src/main/resources/
Modified:
    clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java

Modified: clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java
URL: http://svn.apache.org/viewvc/clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java?rev=1532350&r1=1532349&r2=1532350&view=diff
==============================================================================
--- clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java (original)
+++ clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java Tue Oct 15 14:19:35 2013
@@ -18,268 +18,52 @@
  */
 package org.apache.clerezza.rdf.utils;
 
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.clerezza.rdf.core.BNode;
 import org.apache.clerezza.rdf.core.MGraph;
-import org.apache.clerezza.rdf.core.NonLiteral;
-import org.apache.clerezza.rdf.core.Resource;
-import org.apache.clerezza.rdf.core.Triple;
 import org.apache.clerezza.rdf.core.TripleCollection;
-import org.apache.clerezza.rdf.core.UriRef;
-import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
-import org.apache.clerezza.rdf.core.impl.TripleImpl;
-import org.apache.clerezza.rdf.ontologies.OWL;
-import org.apache.clerezza.rdf.ontologies.RDF;
+import org.apache.clerezza.rdf.core.access.LockableMGraph;
+import org.apache.clerezza.rdf.core.access.LockableMGraphWrapper;
+import org.apache.clerezza.rdf.utils.smushing.IfpSmusher;
+import org.apache.clerezza.rdf.utils.smushing.SameAsSmusher;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /**
- * A utility to equate duplicate nodes in an Mgarph, currently only nodes with 
- * a shared ifp are equated.
- *
+ * A utility to smush equivalent resources. For greater flexibility use the 
+ * classes in the smushing package.
+ * 
  * @author reto
  */
 public class Smusher {
-    
+
     static final Logger log = LoggerFactory.getLogger(Smusher.class);
 
     /**
-     * smush mGaph given the ontological facts. Currently it does only
-     * one step ifp smushin, i.e. only ifps are taken in account and only
-     * nodes that have the same node as ifp object in the orignal graph are
-     * equates. (calling the method a second time might lead to additional
-     * smushings.)
+     * smush mGaph given the ontological facts. Currently it does only one step
+     * ifp smushin, i.e. only ifps are taken in account and only nodes that have
+     * the same node as ifp object in the orignal graph are equates. (calling
+     * the method a second time might lead to additional smushings.)
      *
      * @param mGraph
      * @param tBox
      */
     public static void smush(MGraph mGraph, TripleCollection tBox) {
-        final Set<UriRef> ifps = getIfps(tBox);
-        final Map<PredicateObject, Set<NonLiteral>> ifp2nodesMap = new HashMap<PredicateObject, Set<NonLiteral>>();
-        for (Iterator<Triple> it = mGraph.iterator(); it.hasNext();) {
-            final Triple triple = it.next();
-            final UriRef predicate = triple.getPredicate();
-            if (!ifps.contains(predicate)) {
-                continue;
-            }
-            final PredicateObject po = new PredicateObject(predicate, triple.getObject());
-            Set<NonLiteral> equivalentNodes = ifp2nodesMap.get(po);
-            if (equivalentNodes == null) {
-                equivalentNodes = new HashSet<NonLiteral>();
-                ifp2nodesMap.put(po, equivalentNodes);
-            }
-            equivalentNodes.add(triple.getSubject());
-        }
-        Set<Set<NonLiteral>> unitedEquivalenceSets = uniteSetsWithCommonElement(ifp2nodesMap.values());
-        smush(mGraph, unitedEquivalenceSets);
+        smush(lockable(mGraph), tBox);
     }
-    
+
     public static void sameAsSmush(MGraph mGraph, TripleCollection owlSameStatements) {
-    	
-    	log.info("Starting smushing");
-        
-    	// This hashmap contains a uri (key) and the set of equivalent uris (value)
-    	final Map<NonLiteral, Set<NonLiteral>> node2EquivalenceSet = new HashMap<NonLiteral, Set<NonLiteral>>();
-    	
-    	log.info("Creating the sets of equivalent uris of each subject or object in the owl:sameAs statements");
-    	// Determines for each subject and object in all the owl:sameAs statements the set of ewquivalent uris 
-    	for (Iterator<Triple> it = owlSameStatements.iterator(); it.hasNext();) {            
-    		final Triple triple = it.next();
-            final UriRef predicate = triple.getPredicate();
-            if (!predicate.equals(OWL.sameAs)) {
-                throw new RuntimeException("Statements must use only <http://www.w3.org/2002/07/owl#sameAs> predicate.");
-            }
-            final NonLiteral subject = triple.getSubject();
-            final NonLiteral object = (NonLiteral)triple.getObject();
-            
-            Set<NonLiteral> equivalentNodes = node2EquivalenceSet.get(subject);
-            
-            // if there is not a set of equivalent uris then create a new set
-            if (equivalentNodes == null) {
-            	equivalentNodes = node2EquivalenceSet.get(object);
-            	if (equivalentNodes == null) {
-                    equivalentNodes = new HashSet<NonLiteral>();
-                }
-            }
-            
-            // add both subject and object of the owl:sameAs statement to the set of equivalent uris
-            equivalentNodes.add(subject);
-            equivalentNodes.add(object);
-            
-            // use both uris in the owl:sameAs statement as keys for the set of equivalent uris
-            node2EquivalenceSet.put(subject, equivalentNodes);
-            node2EquivalenceSet.put(object, equivalentNodes);
-            
-            log.info("Sets of equivalent uris created.");
-        
-    	}
-    	
-    	// This set contains the sets of equivalent uris
-    	Set<Set<NonLiteral>> unitedEquivalenceSets = new HashSet<Set<NonLiteral>>(node2EquivalenceSet.values());
-        smush(mGraph, unitedEquivalenceSets);
+        sameAsSmush(lockable(mGraph), owlSameStatements);
     }
     
-    public static void smush(MGraph mGraph, Set<Set<NonLiteral>> unitedEquivalenceSets) {
-        Map<NonLiteral, NonLiteral> current2ReplacementMap = new HashMap<NonLiteral, NonLiteral>();
-        final MGraph owlSameAsGraph = new SimpleMGraph();
-        for (Set<NonLiteral> equivalenceSet : unitedEquivalenceSets) {
-            final NonLiteral replacement = getReplacementFor(equivalenceSet, owlSameAsGraph);
-            for (NonLiteral current : equivalenceSet) {
-                if (!current.equals(replacement)) {
-                    current2ReplacementMap.put(current, replacement);
-                }
-            }
-        }
-        final Set<Triple> newTriples = new HashSet<Triple>();
-        for (Iterator<Triple> it = mGraph.iterator(); it.hasNext();) {
-            final Triple triple = it.next();
-            Triple replacementTriple = null;
-            final NonLiteral subject = triple.getSubject();
-            NonLiteral subjectReplacement =
-                    current2ReplacementMap.get(subject);
-            final Resource object = triple.getObject();
-            @SuppressWarnings("element-type-mismatch")
-            Resource objectReplacement = current2ReplacementMap.get(object);
-            if ((subjectReplacement != null) || (objectReplacement != null)) {
-                it.remove();
-                if (subjectReplacement == null) {
-                    subjectReplacement = subject;
-                }
-                if (objectReplacement == null) {
-                    objectReplacement = object;
-                }
-                newTriples.add(new TripleImpl(subjectReplacement,
-                        triple.getPredicate(), objectReplacement));
-            }
-        }
-        for (Triple triple : newTriples) {
-            mGraph.add(triple);
-        }
-        mGraph.addAll(owlSameAsGraph);
-    }
-
-    private static Set<UriRef> getIfps(TripleCollection tBox) {
-        final Iterator<Triple> ifpDefinitions = tBox.filter(null, RDF.type,
-                OWL.InverseFunctionalProperty);
-        final Set<UriRef> ifps = new HashSet<UriRef>();
-        while (ifpDefinitions.hasNext()) {
-            final Triple triple = ifpDefinitions.next();
-            ifps.add((UriRef) triple.getSubject());
-        }
-        return ifps;
-    }
-
-    private static NonLiteral getReplacementFor(Set<NonLiteral> equivalenceSet, 
-            MGraph owlSameAsGraph) {
-        final Set<UriRef> uriRefs = new HashSet<UriRef>();
-        for (NonLiteral nonLiteral : equivalenceSet) {
-            if (nonLiteral instanceof UriRef) {
-                uriRefs.add((UriRef) nonLiteral);
-            }
-        }
-        switch (uriRefs.size()) {
-            case 1:
-                return uriRefs.iterator().next();
-            case 0:
-                return new BNode();
-        }
-        final Iterator<UriRef> uriRefIter = uriRefs.iterator();
-        //instead of an arbitrary one we might either decide lexicographically
-        //or look at their frequency in mGraph
-        final UriRef first = uriRefIter.next();
-        while (uriRefIter.hasNext()) {
-            UriRef uriRef = uriRefIter.next();
-            owlSameAsGraph.add(new TripleImpl(uriRef, OWL.sameAs, first));
-        }
-        return first;
-    }
-
-    private static <T> Set<Set<T>> uniteSetsWithCommonElement(
-            Collection<Set<T>> originalSets) {
-        Set<Set<T>> result = new HashSet<Set<T>>();
-        Iterator<Set<T>> iter = originalSets.iterator();
-        while (iter.hasNext()) {
-            Set<T> originalSet = iter.next();
-            Set<T> matchingSet = getMatchinSet(originalSet, result);
-            if (matchingSet != null) {
-                matchingSet.addAll(originalSet);
-            } else {
-                result.add(new HashSet<T>(originalSet));
-            }
-        }
-        if (result.size() < originalSets.size()) {
-            return uniteSetsWithCommonElement(result);
-        } else {
-            return result;
-        }
+    public static void smush(LockableMGraph mGraph, TripleCollection tBox) {
+        new IfpSmusher().smush(mGraph, tBox);
     }
 
-    private static <T> Set<T> getMatchinSet(Set<T> set, Set<Set<T>> setOfSet) {
-        for (Set<T> current : setOfSet) {
-            if (shareElements(set,current)) {
-                return current;
-            }
-        }
-        return null;
+    public static void sameAsSmush(LockableMGraph mGraph, TripleCollection owlSameStatements) {
+        new SameAsSmusher().smush(mGraph, owlSameStatements, true);
     }
 
-    private static <T> boolean shareElements(Set<T> set1, Set<T> set2) {
-        for (T elem : set2) {
-            if (set1.contains(elem)) {
-                return true;
-            }
-        }
-        return false;
+    private static LockableMGraph lockable(MGraph mGraph) {
+        return mGraph instanceof LockableMGraph ? 
+                (LockableMGraph) mGraph : new LockableMGraphWrapper(mGraph);
     }
-    
-
-    static class PredicateObject {
-
-        final UriRef predicate;
-        final Resource object;
-
-        public PredicateObject(UriRef predicate, Resource object) {
-            this.predicate = predicate;
-            this.object = object;
-        }
-
-        @Override
-        public boolean equals(Object obj) {
-            if (obj == null) {
-                return false;
-            }
-            if (getClass() != obj.getClass()) {
-                return false;
-            }
-            final PredicateObject other = (PredicateObject) obj;
-            if (this.predicate != other.predicate && !this.predicate.equals(other.predicate)) {
-                return false;
-            }
-            if (this.object != other.object && !this.object.equals(other.object)) {
-                return false;
-            }
-            return true;
-        }
-
-        @Override
-        public int hashCode() {
-            int hash = 3;
-            hash = 29 * hash + this.predicate.hashCode();
-            hash = 13 * hash + this.object.hashCode();
-            return hash;
-        }
-
-        @Override
-        public String toString() {
-            return "("+predicate+", "+object+")";
-        }
-
-
-    };
 }

Added: clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/BaseSmusher.java
URL: http://svn.apache.org/viewvc/clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/BaseSmusher.java?rev=1532350&view=auto
==============================================================================
--- clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/BaseSmusher.java (added)
+++ clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/BaseSmusher.java Tue Oct 15 14:19:35 2013
@@ -0,0 +1,136 @@
+/*
+ * Copyright 2013 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.clerezza.rdf.utils.smushing;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.locks.Lock;
+import org.apache.clerezza.rdf.core.BNode;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.access.LockableMGraph;
+import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.clerezza.rdf.ontologies.OWL;
+
+/**
+ *
+ * @author Reto
+ */
+public class BaseSmusher {
+
+    /**
+     * Smushes the resources in mGraph that belong to the same set in equivalenceSets,
+     * i.e. it adds all properties to one of the resources in the equivalence set.
+     * 
+     * Optionally owl:sameAs statement are added that point from the IRIs that 
+     * no longer have properties to the one with properties. If addOwlSameAs
+     * is false the IRIs will just disappear from the graph.
+     * 
+     * @param mGraph the graph to smush
+     * @param equivalenceSets sets of equivalent resources
+     * @param addOwlSameAs whether owl:sameAs statements should be added
+     */
+    public void smush(LockableMGraph mGraph, Set<Set<NonLiteral>> equivalenceSets, boolean addOwlSameAs) {
+        Map<NonLiteral, NonLiteral> current2ReplacementMap = new HashMap<NonLiteral, NonLiteral>();
+        final MGraph owlSameAsGraph = new SimpleMGraph();
+        for (Set<NonLiteral> equivalenceSet : equivalenceSets) {
+            final NonLiteral replacement = getReplacementFor(equivalenceSet, owlSameAsGraph);
+            for (NonLiteral current : equivalenceSet) {
+                if (!current.equals(replacement)) {
+                    current2ReplacementMap.put(current, replacement);
+                }
+            }
+        }
+        final Set<Triple> newTriples = new HashSet<Triple>();
+        Lock l = mGraph.getLock().writeLock();
+        l.lock();
+        try {
+            for (Iterator<Triple> it = mGraph.iterator(); it.hasNext();) {
+                final Triple triple = it.next();
+                final NonLiteral subject = triple.getSubject();
+                NonLiteral subjectReplacement = current2ReplacementMap.get(subject);
+                final Resource object = triple.getObject();
+                @SuppressWarnings(value = "element-type-mismatch")
+                Resource objectReplacement = current2ReplacementMap.get(object);
+                if ((subjectReplacement != null) || (objectReplacement != null)) {
+                    it.remove();
+                    if (subjectReplacement == null) {
+                        subjectReplacement = subject;
+                    }
+                    if (objectReplacement == null) {
+                        objectReplacement = object;
+                    }
+                    newTriples.add(new TripleImpl(subjectReplacement, triple.getPredicate(), objectReplacement));
+                }
+            }
+            for (Triple triple : newTriples) {
+                mGraph.add(triple);
+            }
+            mGraph.addAll(owlSameAsGraph);
+        } finally {
+            l.unlock();
+        }
+    }
+    
+    private NonLiteral getReplacementFor(Set<NonLiteral> equivalenceSet, 
+            MGraph owlSameAsGraph) {
+        final Set<UriRef> uriRefs = new HashSet<UriRef>();
+        for (NonLiteral nonLiteral : equivalenceSet) {
+            if (nonLiteral instanceof UriRef) {
+                uriRefs.add((UriRef) nonLiteral);
+            }
+        }
+        switch (uriRefs.size()) {
+            case 1:
+                return uriRefs.iterator().next();
+            case 0:
+                return new BNode();
+        }
+        final UriRef preferedIri = getPreferedIri(uriRefs);
+        final Iterator<UriRef> uriRefIter = uriRefs.iterator();
+        while (uriRefIter.hasNext()) {
+            UriRef uriRef = uriRefIter.next();
+            if (!uriRef.equals(preferedIri)) {
+                owlSameAsGraph.add(new TripleImpl(uriRef, OWL.sameAs, preferedIri));
+            }
+        }
+        return preferedIri;
+    }
+
+    
+    /**
+     * Returns a prefered IRI for the IRIs in a set. Typically and in the
+     * default implementation the IRI will be one of the set. Note however that 
+     * subclass implementations may also return another IRI to be used.
+     * 
+     * @param uriRefs
+     * @return 
+     */
+    protected UriRef getPreferedIri(Set<UriRef> uriRefs) {
+        final Iterator<UriRef> uriRefIter = uriRefs.iterator();
+        //instead of an arbitrary one we might either decide lexicographically
+        //or look at their frequency in mGraph
+        return uriRefIter.next();
+    }
+    
+}

Copied: clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/IfpSmusher.java (from r1532294, clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java)
URL: http://svn.apache.org/viewvc/clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/IfpSmusher.java?p2=clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/IfpSmusher.java&p1=clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java&r1=1532294&r2=1532350&rev=1532350&view=diff
==============================================================================
--- clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java (original)
+++ clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/IfpSmusher.java Tue Oct 15 14:19:35 2013
@@ -16,8 +16,9 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.apache.clerezza.rdf.utils;
+package org.apache.clerezza.rdf.utils.smushing;
 
+import org.apache.clerezza.rdf.utils.*;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -32,6 +33,7 @@ import org.apache.clerezza.rdf.core.Reso
 import org.apache.clerezza.rdf.core.Triple;
 import org.apache.clerezza.rdf.core.TripleCollection;
 import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.access.LockableMGraph;
 import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
 import org.apache.clerezza.rdf.core.impl.TripleImpl;
 import org.apache.clerezza.rdf.ontologies.OWL;
@@ -45,9 +47,9 @@ import org.slf4j.LoggerFactory;
  *
  * @author reto
  */
-public class Smusher {
+public class IfpSmusher extends BaseSmusher {
     
-    static final Logger log = LoggerFactory.getLogger(Smusher.class);
+    static final Logger log = LoggerFactory.getLogger(IfpSmusher.class);
 
     /**
      * smush mGaph given the ontological facts. Currently it does only
@@ -59,7 +61,7 @@ public class Smusher {
      * @param mGraph
      * @param tBox
      */
-    public static void smush(MGraph mGraph, TripleCollection tBox) {
+    public void smush(LockableMGraph mGraph, TripleCollection tBox) {
         final Set<UriRef> ifps = getIfps(tBox);
         final Map<PredicateObject, Set<NonLiteral>> ifp2nodesMap = new HashMap<PredicateObject, Set<NonLiteral>>();
         for (Iterator<Triple> it = mGraph.iterator(); it.hasNext();) {
@@ -77,94 +79,11 @@ public class Smusher {
             equivalentNodes.add(triple.getSubject());
         }
         Set<Set<NonLiteral>> unitedEquivalenceSets = uniteSetsWithCommonElement(ifp2nodesMap.values());
-        smush(mGraph, unitedEquivalenceSets);
+        smush(mGraph, unitedEquivalenceSets, true);
     }
     
-    public static void sameAsSmush(MGraph mGraph, TripleCollection owlSameStatements) {
-    	
-    	log.info("Starting smushing");
-        
-    	// This hashmap contains a uri (key) and the set of equivalent uris (value)
-    	final Map<NonLiteral, Set<NonLiteral>> node2EquivalenceSet = new HashMap<NonLiteral, Set<NonLiteral>>();
-    	
-    	log.info("Creating the sets of equivalent uris of each subject or object in the owl:sameAs statements");
-    	// Determines for each subject and object in all the owl:sameAs statements the set of ewquivalent uris 
-    	for (Iterator<Triple> it = owlSameStatements.iterator(); it.hasNext();) {            
-    		final Triple triple = it.next();
-            final UriRef predicate = triple.getPredicate();
-            if (!predicate.equals(OWL.sameAs)) {
-                throw new RuntimeException("Statements must use only <http://www.w3.org/2002/07/owl#sameAs> predicate.");
-            }
-            final NonLiteral subject = triple.getSubject();
-            final NonLiteral object = (NonLiteral)triple.getObject();
-            
-            Set<NonLiteral> equivalentNodes = node2EquivalenceSet.get(subject);
-            
-            // if there is not a set of equivalent uris then create a new set
-            if (equivalentNodes == null) {
-            	equivalentNodes = node2EquivalenceSet.get(object);
-            	if (equivalentNodes == null) {
-                    equivalentNodes = new HashSet<NonLiteral>();
-                }
-            }
-            
-            // add both subject and object of the owl:sameAs statement to the set of equivalent uris
-            equivalentNodes.add(subject);
-            equivalentNodes.add(object);
-            
-            // use both uris in the owl:sameAs statement as keys for the set of equivalent uris
-            node2EquivalenceSet.put(subject, equivalentNodes);
-            node2EquivalenceSet.put(object, equivalentNodes);
-            
-            log.info("Sets of equivalent uris created.");
-        
-    	}
-    	
-    	// This set contains the sets of equivalent uris
-    	Set<Set<NonLiteral>> unitedEquivalenceSets = new HashSet<Set<NonLiteral>>(node2EquivalenceSet.values());
-        smush(mGraph, unitedEquivalenceSets);
-    }
-    
-    public static void smush(MGraph mGraph, Set<Set<NonLiteral>> unitedEquivalenceSets) {
-        Map<NonLiteral, NonLiteral> current2ReplacementMap = new HashMap<NonLiteral, NonLiteral>();
-        final MGraph owlSameAsGraph = new SimpleMGraph();
-        for (Set<NonLiteral> equivalenceSet : unitedEquivalenceSets) {
-            final NonLiteral replacement = getReplacementFor(equivalenceSet, owlSameAsGraph);
-            for (NonLiteral current : equivalenceSet) {
-                if (!current.equals(replacement)) {
-                    current2ReplacementMap.put(current, replacement);
-                }
-            }
-        }
-        final Set<Triple> newTriples = new HashSet<Triple>();
-        for (Iterator<Triple> it = mGraph.iterator(); it.hasNext();) {
-            final Triple triple = it.next();
-            Triple replacementTriple = null;
-            final NonLiteral subject = triple.getSubject();
-            NonLiteral subjectReplacement =
-                    current2ReplacementMap.get(subject);
-            final Resource object = triple.getObject();
-            @SuppressWarnings("element-type-mismatch")
-            Resource objectReplacement = current2ReplacementMap.get(object);
-            if ((subjectReplacement != null) || (objectReplacement != null)) {
-                it.remove();
-                if (subjectReplacement == null) {
-                    subjectReplacement = subject;
-                }
-                if (objectReplacement == null) {
-                    objectReplacement = object;
-                }
-                newTriples.add(new TripleImpl(subjectReplacement,
-                        triple.getPredicate(), objectReplacement));
-            }
-        }
-        for (Triple triple : newTriples) {
-            mGraph.add(triple);
-        }
-        mGraph.addAll(owlSameAsGraph);
-    }
 
-    private static Set<UriRef> getIfps(TripleCollection tBox) {
+    private Set<UriRef> getIfps(TripleCollection tBox) {
         final Iterator<Triple> ifpDefinitions = tBox.filter(null, RDF.type,
                 OWL.InverseFunctionalProperty);
         final Set<UriRef> ifps = new HashSet<UriRef>();
@@ -175,37 +94,13 @@ public class Smusher {
         return ifps;
     }
 
-    private static NonLiteral getReplacementFor(Set<NonLiteral> equivalenceSet, 
-            MGraph owlSameAsGraph) {
-        final Set<UriRef> uriRefs = new HashSet<UriRef>();
-        for (NonLiteral nonLiteral : equivalenceSet) {
-            if (nonLiteral instanceof UriRef) {
-                uriRefs.add((UriRef) nonLiteral);
-            }
-        }
-        switch (uriRefs.size()) {
-            case 1:
-                return uriRefs.iterator().next();
-            case 0:
-                return new BNode();
-        }
-        final Iterator<UriRef> uriRefIter = uriRefs.iterator();
-        //instead of an arbitrary one we might either decide lexicographically
-        //or look at their frequency in mGraph
-        final UriRef first = uriRefIter.next();
-        while (uriRefIter.hasNext()) {
-            UriRef uriRef = uriRefIter.next();
-            owlSameAsGraph.add(new TripleImpl(uriRef, OWL.sameAs, first));
-        }
-        return first;
-    }
-
-    private static <T> Set<Set<T>> uniteSetsWithCommonElement(
+    private <T> Set<Set<T>> uniteSetsWithCommonElement(
             Collection<Set<T>> originalSets) {
         Set<Set<T>> result = new HashSet<Set<T>>();
         Iterator<Set<T>> iter = originalSets.iterator();
         while (iter.hasNext()) {
             Set<T> originalSet = iter.next();
+            //TODO this could be done more efficiently with a map
             Set<T> matchingSet = getMatchinSet(originalSet, result);
             if (matchingSet != null) {
                 matchingSet.addAll(originalSet);
@@ -220,7 +115,7 @@ public class Smusher {
         }
     }
 
-    private static <T> Set<T> getMatchinSet(Set<T> set, Set<Set<T>> setOfSet) {
+    private <T> Set<T> getMatchinSet(Set<T> set, Set<Set<T>> setOfSet) {
         for (Set<T> current : setOfSet) {
             if (shareElements(set,current)) {
                 return current;
@@ -229,7 +124,7 @@ public class Smusher {
         return null;
     }
 
-    private static <T> boolean shareElements(Set<T> set1, Set<T> set2) {
+    private <T> boolean shareElements(Set<T> set1, Set<T> set2) {
         for (T elem : set2) {
             if (set1.contains(elem)) {
                 return true;
@@ -239,7 +134,7 @@ public class Smusher {
     }
     
 
-    static class PredicateObject {
+    class PredicateObject {
 
         final UriRef predicate;
         final Resource object;

Copied: clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/SameAsSmusher.java (from r1532294, clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java)
URL: http://svn.apache.org/viewvc/clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/SameAsSmusher.java?p2=clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/SameAsSmusher.java&p1=clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java&r1=1532294&r2=1532350&rev=1532350&view=diff
==============================================================================
--- clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java (original)
+++ clerezza/trunk/rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/smushing/SameAsSmusher.java Tue Oct 15 14:19:35 2013
@@ -16,71 +16,43 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.apache.clerezza.rdf.utils;
+package org.apache.clerezza.rdf.utils.smushing;
 
-import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Map;
 import java.util.Set;
 
-import org.apache.clerezza.rdf.core.BNode;
-import org.apache.clerezza.rdf.core.MGraph;
 import org.apache.clerezza.rdf.core.NonLiteral;
-import org.apache.clerezza.rdf.core.Resource;
 import org.apache.clerezza.rdf.core.Triple;
 import org.apache.clerezza.rdf.core.TripleCollection;
 import org.apache.clerezza.rdf.core.UriRef;
-import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
-import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.clerezza.rdf.core.access.LockableMGraph;
 import org.apache.clerezza.rdf.ontologies.OWL;
-import org.apache.clerezza.rdf.ontologies.RDF;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /**
- * A utility to equate duplicate nodes in an Mgarph, currently only nodes with 
- * a shared ifp are equated.
+ * A utility to equate duplicate nodes in an Mgraph. This unifies owl:sameAs
+ * resources.
  *
  * @author reto
  */
-public class Smusher {
+public class SameAsSmusher extends BaseSmusher {
+    
+    static final Logger log = LoggerFactory.getLogger(SameAsSmusher.class);
     
-    static final Logger log = LoggerFactory.getLogger(Smusher.class);
-
     /**
-     * smush mGaph given the ontological facts. Currently it does only
-     * one step ifp smushin, i.e. only ifps are taken in account and only
-     * nodes that have the same node as ifp object in the orignal graph are
-     * equates. (calling the method a second time might lead to additional
-     * smushings.)
-     *
+     * This will ensure that all properties of sameAs resources are associated
+     * to the preferedIri as returned by {@code getPreferedIri}
      * @param mGraph
-     * @param tBox
+     * @param owlSameStatements 
+     * @param addCanonicalSameAsStatements if true owl:sameAsSteemntes with the preferedIri as object will be added
      */
-    public static void smush(MGraph mGraph, TripleCollection tBox) {
-        final Set<UriRef> ifps = getIfps(tBox);
-        final Map<PredicateObject, Set<NonLiteral>> ifp2nodesMap = new HashMap<PredicateObject, Set<NonLiteral>>();
-        for (Iterator<Triple> it = mGraph.iterator(); it.hasNext();) {
-            final Triple triple = it.next();
-            final UriRef predicate = triple.getPredicate();
-            if (!ifps.contains(predicate)) {
-                continue;
-            }
-            final PredicateObject po = new PredicateObject(predicate, triple.getObject());
-            Set<NonLiteral> equivalentNodes = ifp2nodesMap.get(po);
-            if (equivalentNodes == null) {
-                equivalentNodes = new HashSet<NonLiteral>();
-                ifp2nodesMap.put(po, equivalentNodes);
-            }
-            equivalentNodes.add(triple.getSubject());
-        }
-        Set<Set<NonLiteral>> unitedEquivalenceSets = uniteSetsWithCommonElement(ifp2nodesMap.values());
-        smush(mGraph, unitedEquivalenceSets);
-    }
-    
-    public static void sameAsSmush(MGraph mGraph, TripleCollection owlSameStatements) {
+    public void smush(LockableMGraph mGraph, 
+            TripleCollection owlSameStatements,
+            boolean addCanonicalSameAsStatements) {
     	
     	log.info("Starting smushing");
         
@@ -96,6 +68,7 @@ public class Smusher {
                 throw new RuntimeException("Statements must use only <http://www.w3.org/2002/07/owl#sameAs> predicate.");
             }
             final NonLiteral subject = triple.getSubject();
+            //literals not yet supported
             final NonLiteral object = (NonLiteral)triple.getObject();
             
             Set<NonLiteral> equivalentNodes = node2EquivalenceSet.get(subject);
@@ -106,6 +79,18 @@ public class Smusher {
             	if (equivalentNodes == null) {
                     equivalentNodes = new HashSet<NonLiteral>();
                 }
+            } else {
+                Set<NonLiteral> objectSet = node2EquivalenceSet.get(object);
+                if (object != null) {
+                    //merge two sets
+                    for (NonLiteral res : objectSet) {
+                        node2EquivalenceSet.remove(res);
+                    }
+                    for (NonLiteral res : objectSet) {
+                        node2EquivalenceSet.put(res,equivalentNodes);
+                    }
+                    equivalentNodes.addAll(objectSet);
+                }
             }
             
             // add both subject and object of the owl:sameAs statement to the set of equivalent uris
@@ -122,164 +107,8 @@ public class Smusher {
     	
     	// This set contains the sets of equivalent uris
     	Set<Set<NonLiteral>> unitedEquivalenceSets = new HashSet<Set<NonLiteral>>(node2EquivalenceSet.values());
-        smush(mGraph, unitedEquivalenceSets);
-    }
-    
-    public static void smush(MGraph mGraph, Set<Set<NonLiteral>> unitedEquivalenceSets) {
-        Map<NonLiteral, NonLiteral> current2ReplacementMap = new HashMap<NonLiteral, NonLiteral>();
-        final MGraph owlSameAsGraph = new SimpleMGraph();
-        for (Set<NonLiteral> equivalenceSet : unitedEquivalenceSets) {
-            final NonLiteral replacement = getReplacementFor(equivalenceSet, owlSameAsGraph);
-            for (NonLiteral current : equivalenceSet) {
-                if (!current.equals(replacement)) {
-                    current2ReplacementMap.put(current, replacement);
-                }
-            }
-        }
-        final Set<Triple> newTriples = new HashSet<Triple>();
-        for (Iterator<Triple> it = mGraph.iterator(); it.hasNext();) {
-            final Triple triple = it.next();
-            Triple replacementTriple = null;
-            final NonLiteral subject = triple.getSubject();
-            NonLiteral subjectReplacement =
-                    current2ReplacementMap.get(subject);
-            final Resource object = triple.getObject();
-            @SuppressWarnings("element-type-mismatch")
-            Resource objectReplacement = current2ReplacementMap.get(object);
-            if ((subjectReplacement != null) || (objectReplacement != null)) {
-                it.remove();
-                if (subjectReplacement == null) {
-                    subjectReplacement = subject;
-                }
-                if (objectReplacement == null) {
-                    objectReplacement = object;
-                }
-                newTriples.add(new TripleImpl(subjectReplacement,
-                        triple.getPredicate(), objectReplacement));
-            }
-        }
-        for (Triple triple : newTriples) {
-            mGraph.add(triple);
-        }
-        mGraph.addAll(owlSameAsGraph);
-    }
-
-    private static Set<UriRef> getIfps(TripleCollection tBox) {
-        final Iterator<Triple> ifpDefinitions = tBox.filter(null, RDF.type,
-                OWL.InverseFunctionalProperty);
-        final Set<UriRef> ifps = new HashSet<UriRef>();
-        while (ifpDefinitions.hasNext()) {
-            final Triple triple = ifpDefinitions.next();
-            ifps.add((UriRef) triple.getSubject());
-        }
-        return ifps;
-    }
-
-    private static NonLiteral getReplacementFor(Set<NonLiteral> equivalenceSet, 
-            MGraph owlSameAsGraph) {
-        final Set<UriRef> uriRefs = new HashSet<UriRef>();
-        for (NonLiteral nonLiteral : equivalenceSet) {
-            if (nonLiteral instanceof UriRef) {
-                uriRefs.add((UriRef) nonLiteral);
-            }
-        }
-        switch (uriRefs.size()) {
-            case 1:
-                return uriRefs.iterator().next();
-            case 0:
-                return new BNode();
-        }
-        final Iterator<UriRef> uriRefIter = uriRefs.iterator();
-        //instead of an arbitrary one we might either decide lexicographically
-        //or look at their frequency in mGraph
-        final UriRef first = uriRefIter.next();
-        while (uriRefIter.hasNext()) {
-            UriRef uriRef = uriRefIter.next();
-            owlSameAsGraph.add(new TripleImpl(uriRef, OWL.sameAs, first));
-        }
-        return first;
-    }
-
-    private static <T> Set<Set<T>> uniteSetsWithCommonElement(
-            Collection<Set<T>> originalSets) {
-        Set<Set<T>> result = new HashSet<Set<T>>();
-        Iterator<Set<T>> iter = originalSets.iterator();
-        while (iter.hasNext()) {
-            Set<T> originalSet = iter.next();
-            Set<T> matchingSet = getMatchinSet(originalSet, result);
-            if (matchingSet != null) {
-                matchingSet.addAll(originalSet);
-            } else {
-                result.add(new HashSet<T>(originalSet));
-            }
-        }
-        if (result.size() < originalSets.size()) {
-            return uniteSetsWithCommonElement(result);
-        } else {
-            return result;
-        }
+        smush(mGraph, unitedEquivalenceSets, addCanonicalSameAsStatements);
     }
 
-    private static <T> Set<T> getMatchinSet(Set<T> set, Set<Set<T>> setOfSet) {
-        for (Set<T> current : setOfSet) {
-            if (shareElements(set,current)) {
-                return current;
-            }
-        }
-        return null;
-    }
-
-    private static <T> boolean shareElements(Set<T> set1, Set<T> set2) {
-        for (T elem : set2) {
-            if (set1.contains(elem)) {
-                return true;
-            }
-        }
-        return false;
-    }
     
-
-    static class PredicateObject {
-
-        final UriRef predicate;
-        final Resource object;
-
-        public PredicateObject(UriRef predicate, Resource object) {
-            this.predicate = predicate;
-            this.object = object;
-        }
-
-        @Override
-        public boolean equals(Object obj) {
-            if (obj == null) {
-                return false;
-            }
-            if (getClass() != obj.getClass()) {
-                return false;
-            }
-            final PredicateObject other = (PredicateObject) obj;
-            if (this.predicate != other.predicate && !this.predicate.equals(other.predicate)) {
-                return false;
-            }
-            if (this.object != other.object && !this.object.equals(other.object)) {
-                return false;
-            }
-            return true;
-        }
-
-        @Override
-        public int hashCode() {
-            int hash = 3;
-            hash = 29 * hash + this.predicate.hashCode();
-            hash = 13 * hash + this.object.hashCode();
-            return hash;
-        }
-
-        @Override
-        public String toString() {
-            return "("+predicate+", "+object+")";
-        }
-
-
-    };
 }