You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@clerezza.apache.org by ha...@apache.org on 2010/12/06 12:01:52 UTC
svn commit: r1042574 - in
/incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.stable.serializer/src:
main/java/org/apache/clerezza/rdf/stable/serializer/
test/java/org/apache/clerezza/rdf/stable/serializer/
Author: hasan
Date: Mon Dec 6 11:01:51 2010
New Revision: 1042574
URL: http://svn.apache.org/viewvc?rev=1042574&view=rev
Log:
CLEREZZA-345: applied and reviewed patch by Daniel Spicar, and improved code
Modified:
incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.stable.serializer/src/main/java/org/apache/clerezza/rdf/stable/serializer/StableSerializerProvider.java
incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/stable/serializer/DocumentationStabilityTest.java
Modified: incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.stable.serializer/src/main/java/org/apache/clerezza/rdf/stable/serializer/StableSerializerProvider.java
URL: http://svn.apache.org/viewvc/incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.stable.serializer/src/main/java/org/apache/clerezza/rdf/stable/serializer/StableSerializerProvider.java?rev=1042574&r1=1042573&r2=1042574&view=diff
==============================================================================
--- incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.stable.serializer/src/main/java/org/apache/clerezza/rdf/stable/serializer/StableSerializerProvider.java (original)
+++ incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.stable.serializer/src/main/java/org/apache/clerezza/rdf/stable/serializer/StableSerializerProvider.java Mon Dec 6 11:01:51 2010
@@ -25,16 +25,18 @@ import java.io.OutputStream;
import java.io.StringReader;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
+import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
+import java.util.LinkedList;
import java.util.List;
import java.util.Map;
-import java.util.Vector;
import org.apache.clerezza.rdf.core.BNode;
import org.apache.clerezza.rdf.core.Graph;
import org.apache.clerezza.rdf.core.Literal;
+import org.apache.clerezza.rdf.core.Resource;
import org.apache.clerezza.rdf.core.Triple;
import org.apache.clerezza.rdf.core.TripleCollection;
import org.apache.clerezza.rdf.core.UriRef;
@@ -67,8 +69,8 @@ import org.slf4j.LoggerFactory;
* to provide similar results when serializing graphs. Specifically it tries to
* label blank nodes deterministically with reasonable complexity.
*
- * This serilaizer does not guarantee a deterministic result but it may minimze
- * the ammount of modified lines in serilaized output.
+ * This serializer does not guarantee a deterministic result but it may minimize
+ * the amount of modified lines in serialized output.
*
* @author Daniel Spicar (daniel.spicar@access.uzh.ch)
*/
@@ -77,7 +79,7 @@ import org.slf4j.LoggerFactory;
@SupportedFormat({SupportedFormat.N_TRIPLE})
public class StableSerializerProvider implements SerializingProvider {
- @Property(description="Specifies maximum ammount of blank node " +
+ @Property(description="Specifies maximum amount of blank nodes " +
"labeling recursions, may increase performance at the expense of stability " +
"(0 = no limit).", intValue=0)
public static final String MAX_LABELING_ITERATIONS = "max_labeling_iterations";
@@ -97,7 +99,7 @@ public class StableSerializerProvider im
String formatIdentifier) {
try {
- List<String> lines = new Vector<String>();
+ List<String> lines = new LinkedList<String>();
List<MSG> msgs = decomposeGraphToMSGs(tc);
NTriplesSerializer serializer = new NTriplesSerializer();
@@ -127,7 +129,7 @@ public class StableSerializerProvider im
TripleCollection tmp = new SimpleMGraph();
tmp.addAll(tc);
- List<MSG> msgSet = new Vector<MSG>();
+ List<MSG> msgSet = new LinkedList<MSG>();
while (tmp.size() > 0) {
Triple triple = tmp.iterator().next();
@@ -147,43 +149,39 @@ public class StableSerializerProvider im
boolean containsBNode = false;
- if (triple.getSubject() instanceof BNode) {
+ Resource resource = triple.getSubject();
+ if (resource instanceof BNode) {
containsBNode = true;
- GraphNode gn = new GraphNode(triple.getSubject(), tc);
+ } else {
+ resource = triple.getObject();
+ if (resource instanceof BNode) {
+ containsBNode = true;
+ }
+ }
+ if (containsBNode) {
+ GraphNode gn = new GraphNode(resource, tc);
Graph context = gn.getNodeContext();
msg.addAll(context);
tc.removeAll(context);
} else {
- if ((triple.getObject() instanceof BNode)
- && (triple.getSubject() != triple.getObject())) {
- containsBNode = true;
- GraphNode gn = new GraphNode(triple.getObject(), tc);
- Graph context = gn.getNodeContext();
- msg.addAll(context);
- tc.removeAll(context);
- } else {
- msg.add(triple);
- tc.remove(triple);
- }
+ msg.add(triple);
+ tc.remove(triple);
}
-
return containsBNode;
}
-
-
private List<String> labelBlankNodes(BufferedReader serializedGraph,
String prefix) throws IOException {
String line = null;
- List<String> lines = new Vector<String>();
+ List<String> lines = new LinkedList<String>();
long commentedIdentifiers = 0;
while ((line = serializedGraph.readLine()) != null) {
try {
commentedIdentifiers = commentBlankNodeLabels(line,
commentedIdentifiers, lines);
- } catch(IOException ex) {
+ } catch (IOException ex) {
logger.error("Exception while trying to parse line: "
+ line + "\n{}", ex);
}
@@ -404,22 +402,30 @@ public class StableSerializerProvider im
//hash is needed only for b-node labelling
continue;
}
- StringBuffer input = new StringBuffer();
+ List<String> tripleHashes = new ArrayList<String>(msg.tc.size());
for (Triple t : msg.tc) {
+ StringBuilder tripleHash = new StringBuilder();
if (!(t.getSubject() instanceof BNode)) {
- input.append(((UriRef) t.getSubject()).hashCode());
+ tripleHash.append(((UriRef) t.getSubject()).hashCode());
}
- input.append(t.getPredicate().hashCode());
+ tripleHash.append(t.getPredicate().hashCode());
if (!(t.getObject() instanceof BNode)) {
if (t.getObject() instanceof Literal) {
- input.append(((Literal) t.getObject()).
+ tripleHash.append(((Literal) t.getObject()).
toString().hashCode());
} else {
- input.append(((UriRef) t.getObject()).hashCode());
+ tripleHash.append(((UriRef) t.getObject()).hashCode());
}
}
+ tripleHashes.add(tripleHash.toString());
}
- md.update(input.toString().getBytes());
+ Collections.sort(tripleHashes);
+ StringBuilder msgHash = new StringBuilder();
+ for(String tripleHash : tripleHashes) {
+ msgHash.append(tripleHash);
+ }
+
+ md.update(msgHash.toString().getBytes());
String hexString;
if(computedHashes.add((hexString = getHashHexString(md.digest())))){
@@ -437,9 +443,8 @@ public class StableSerializerProvider im
}
}
-
private String getHashHexString(byte[] hash) {
- StringBuffer hexString = new StringBuffer();
+ StringBuilder hexString = new StringBuilder();
for (int i = 0; i < hash.length; i++) {
String hex = Integer.toHexString(0xFF & hash[i]);
if (hex.length() == 1) {
Modified: incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/stable/serializer/DocumentationStabilityTest.java
URL: http://svn.apache.org/viewvc/incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/stable/serializer/DocumentationStabilityTest.java?rev=1042574&r1=1042573&r2=1042574&view=diff
==============================================================================
--- incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/stable/serializer/DocumentationStabilityTest.java (original)
+++ incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/stable/serializer/DocumentationStabilityTest.java Mon Dec 6 11:01:51 2010
@@ -52,8 +52,7 @@ public class DocumentationStabilityTest
new UriRef("bundle://org.apache.clerezza.platform.documentation/intro")));
final Set<String> lines2 = serializeToLines(tc2);
lines2.removeAll(lines1);
- //TODO: the following fails, fix:
- //Assert.assertEquals(1, lines2.size());
+ Assert.assertEquals(1, lines2.size());
}
private Set<String> serializeToLines(TripleCollection tc) throws UnsupportedEncodingException {