You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2013/11/02 14:18:44 UTC

svn commit: r1538185 - in /jena/trunk/jena-arq: src-examples/arq/examples/riot/ src/main/java/org/apache/jena/riot/lang/ src/test/java/org/apache/jena/riot/lang/

Author: andy
Date: Sat Nov  2 13:18:44 2013
New Revision: 1538185

URL: http://svn.apache.org/r1538185
Log:
JENA-581 : Collector pattern for parsing.

Added:
    jena/trunk/jena-arq/src-examples/arq/examples/riot/ExRIOT_7.java
    jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamBase.java
    jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamQuads.java
    jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamTriples.java
    jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamTuples.java
    jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/lang/TestCollectorStream.java
Modified:
    jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/lang/TS_Lang.java

Added: jena/trunk/jena-arq/src-examples/arq/examples/riot/ExRIOT_7.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src-examples/arq/examples/riot/ExRIOT_7.java?rev=1538185&view=auto
==============================================================================
--- jena/trunk/jena-arq/src-examples/arq/examples/riot/ExRIOT_7.java (added)
+++ jena/trunk/jena-arq/src-examples/arq/examples/riot/ExRIOT_7.java Sat Nov  2 13:18:44 2013
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package arq.examples.riot;
+
+import org.apache.jena.riot.RDFDataMgr;
+import org.apache.jena.riot.lang.CollectorStreamBase;
+import org.apache.jena.riot.lang.CollectorStreamTriples;
+
+import com.hp.hpl.jena.graph.Triple;
+
+/**
+ * Example of using RIOT for streaming RDF to be stored into a Collection.
+ * 
+ * Suitable for single-threaded parsing, for use with small data or distributed 
+ * computing frameworks (e.g. Hadoop) where the overhead of creating many threads
+ * is significant. 
+ * 
+ * @see CollectorStreamBase
+ */
+public class ExRIOT_7 {
+
+    public static void main(String... argv) {
+        final String filename = "data.ttl";
+        
+        CollectorStreamTriples inputStream = new CollectorStreamTriples();
+        RDFDataMgr.parse(inputStream, filename);
+
+        for (Triple triple : inputStream.getCollected()) {
+        	System.out.println(triple);
+        }
+    }
+
+}

Added: jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamBase.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamBase.java?rev=1538185&view=auto
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamBase.java (added)
+++ jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamBase.java Sat Nov  2 13:18:44 2013
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.riot.lang;
+
+import java.util.Collection;
+
+import org.apache.jena.atlas.lib.Tuple;
+import org.apache.jena.riot.system.PrefixMap;
+import org.apache.jena.riot.system.PrefixMapFactory;
+import org.apache.jena.riot.system.StreamRDF;
+
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.graph.Triple;
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * Base class for StreamRDF implementations which store received <T>
+ * objects in a {@link java.util.Collection}. 
+ * 
+ * The resulting collection can be retrieved via the {@link #getCollected()}
+ * method.
+ * 
+ * Implementations are suitable for single-threaded parsing, for use with small
+ * data or distributed computing frameworks (e.g. Hadoop) where the overhead
+ * of creating many threads is significant.
+ *
+ * @param <T> Type of the value stored in the collection
+ */
+public abstract class CollectorStreamBase<T> implements StreamRDF {
+	private final PrefixMap prefixes = PrefixMapFactory.createForInput();
+	private String baseIri;
+	
+	@Override
+	public void finish() {}
+	
+	@Override
+	public void triple(Triple triple) {}
+
+	@Override
+	public void tuple(Tuple<Node> tuple) {}
+	
+	@Override
+	public void quad(Quad quad) {}
+
+	@Override
+	public void start() {}
+	
+	@Override
+	public void base(String base) {
+		this.baseIri = base;
+	}
+	
+	@Override
+	public void prefix(String prefix, String iri) {
+		prefixes.add(prefix, iri);
+	}
+	
+	public PrefixMap getPrefixes() {
+		return prefixes;
+	}
+
+	public String getBaseIri() {
+		return baseIri;
+	}
+
+	/**
+	 * @return The collection received by this instance. 
+	 */
+	public abstract Collection<T> getCollected();
+}

Added: jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamQuads.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamQuads.java?rev=1538185&view=auto
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamQuads.java (added)
+++ jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamQuads.java Sat Nov  2 13:18:44 2013
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.riot.lang;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.jena.riot.system.StreamRDF;
+
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * Collector stream for quads.
+ * 
+ * @see CollectorStreamBase
+ */
+public class CollectorStreamQuads extends CollectorStreamBase<Quad>	implements StreamRDF {
+	private List<Quad> quads = new ArrayList<Quad>();
+
+	@Override
+	public void start() {
+		quads.clear();
+	}
+
+	@Override
+	public void quad(Quad quad) {
+		quads.add(quad);
+	}
+
+	@Override
+	public Collection<Quad> getCollected() {
+		return quads;
+	}
+}

Added: jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamTriples.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamTriples.java?rev=1538185&view=auto
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamTriples.java (added)
+++ jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamTriples.java Sat Nov  2 13:18:44 2013
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.riot.lang;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.jena.riot.system.StreamRDF;
+
+import com.hp.hpl.jena.graph.Triple;
+
+/**
+ * Collector stream for triples.
+ * 
+ * @see CollectorStreamBase
+ */
+public class CollectorStreamTriples extends CollectorStreamBase<Triple> implements StreamRDF {
+	private List<Triple> triples = new ArrayList<Triple>();
+
+	@Override
+	public void start() {
+		triples.clear();
+	}
+
+	@Override
+	public void triple(Triple triple) {
+		triples.add(triple);
+	}
+
+	@Override
+	public Collection<Triple> getCollected() {
+		return triples;
+	}
+}

Added: jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamTuples.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamTuples.java?rev=1538185&view=auto
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamTuples.java (added)
+++ jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamTuples.java Sat Nov  2 13:18:44 2013
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.riot.lang;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.jena.atlas.lib.Tuple;
+import org.apache.jena.riot.system.StreamRDF;
+
+import com.hp.hpl.jena.graph.Node;
+
+/**
+ * Collector stream for quads.
+ * 
+ * @see CollectorStreamBase
+ */
+public class CollectorStreamTuples extends CollectorStreamBase<Tuple<Node>>	implements StreamRDF {
+	private List<Tuple<Node>> tuples = new ArrayList<Tuple<Node>>();
+
+	@Override
+	public void start() {
+		tuples.clear();
+	}
+
+	@Override
+	public void tuple(Tuple<Node> tuple) {
+		tuples.add(tuple);
+	}
+
+	@Override
+	public Collection<Tuple<Node>> getCollected() {
+		return tuples;
+	}
+}

Modified: jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/lang/TS_Lang.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/lang/TS_Lang.java?rev=1538185&r1=1538184&r2=1538185&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/lang/TS_Lang.java (original)
+++ jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/lang/TS_Lang.java Sat Nov  2 13:18:44 2013
@@ -40,7 +40,7 @@ import org.junit.runners.Suite ;
     , TestLangRdfJson.class
     , TestParserFactory.class
     , TestPipedRDFIterators.class
-    
+    , TestCollectorStream.class
 })
 
 

Added: jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/lang/TestCollectorStream.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/lang/TestCollectorStream.java?rev=1538185&view=auto
==============================================================================
--- jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/lang/TestCollectorStream.java (added)
+++ jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/lang/TestCollectorStream.java Sat Nov  2 13:18:44 2013
@@ -0,0 +1,83 @@
+package org.apache.jena.riot.lang;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.jena.atlas.lib.Tuple;
+import org.apache.jena.riot.system.StreamRDF;
+import org.junit.Assert;
+import org.junit.Test;
+
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.graph.NodeFactory ;
+import com.hp.hpl.jena.graph.Triple;
+import com.hp.hpl.jena.sparql.core.Quad;
+import com.hp.hpl.jena.sparql.util.NodeFactoryExtra;
+
+public class TestCollectorStream  {
+	
+	private List<Triple> writeTriples(StreamRDF out, int size) {
+		List<Triple> results = new ArrayList<Triple>();
+		out.start();
+        for (int i = 1; i <= size; i++) {
+            Triple t = new Triple(NodeFactory.createAnon(),
+                    NodeFactory.createURI("http://predicate"), NodeFactoryExtra.intToNode(i));
+            out.triple(t);
+            results.add(t);
+        }
+        out.finish();
+        return results;
+	}
+	
+	@Test
+	public void test_streamed_triples() {
+		CollectorStreamTriples out = new CollectorStreamTriples();
+		List<Triple> expected = writeTriples(out, 10);
+		
+		Assert.assertEquals(expected, out.getCollected());
+	}
+	
+	private List<Quad> writeQuads(StreamRDF out, int size) {
+		List<Quad> results = new ArrayList<Quad>();
+		out.start();
+        for (int i = 1; i <= size; i++) {
+        	Quad q = new Quad(NodeFactory.createURI("http://graph"),
+                    NodeFactory.createAnon(),
+                    NodeFactory.createURI("http://predicate"), NodeFactoryExtra.intToNode(i));
+            out.quad(q);
+            results.add(q);
+        }
+        out.finish();
+        return results;
+	}
+	
+	@Test
+	public void test_streamed_quads() {
+		CollectorStreamQuads out = new CollectorStreamQuads();
+		List<Quad> expected = writeQuads(out, 10);
+		
+		Assert.assertEquals(expected, out.getCollected());
+	}
+	
+	private List<Tuple<Node>> writeTuples(StreamRDF out, int size) {
+		List<Tuple<Node>> results = new ArrayList<Tuple<Node>>();
+		out.start();
+        for (int i = 1; i <= size; i++) {
+            Tuple<Node> t = Tuple.createTuple(NodeFactory.createURI("http://graph"),
+                    NodeFactory.createAnon(),
+                    NodeFactory.createURI("http://predicate"), NodeFactoryExtra.intToNode(i));
+            out.tuple(t);
+            results.add(t);
+        }
+        out.finish();
+        return results;
+	}
+	
+	@Test
+	public void test_streamed_tuples() {
+		CollectorStreamTuples out = new CollectorStreamTuples();
+		List<Tuple<Node>> expected = writeTuples(out, 10);
+		
+		Assert.assertEquals(expected, out.getCollected());
+	}
+}