You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2013/11/02 14:18:44 UTC
svn commit: r1538185 - in /jena/trunk/jena-arq:
src-examples/arq/examples/riot/ src/main/java/org/apache/jena/riot/lang/
src/test/java/org/apache/jena/riot/lang/
Author: andy
Date: Sat Nov 2 13:18:44 2013
New Revision: 1538185
URL: http://svn.apache.org/r1538185
Log:
JENA-581 : Collector pattern for parsing.
Added:
jena/trunk/jena-arq/src-examples/arq/examples/riot/ExRIOT_7.java
jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamBase.java
jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamQuads.java
jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamTriples.java
jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamTuples.java
jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/lang/TestCollectorStream.java
Modified:
jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/lang/TS_Lang.java
Added: jena/trunk/jena-arq/src-examples/arq/examples/riot/ExRIOT_7.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src-examples/arq/examples/riot/ExRIOT_7.java?rev=1538185&view=auto
==============================================================================
--- jena/trunk/jena-arq/src-examples/arq/examples/riot/ExRIOT_7.java (added)
+++ jena/trunk/jena-arq/src-examples/arq/examples/riot/ExRIOT_7.java Sat Nov 2 13:18:44 2013
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package arq.examples.riot;
+
+import org.apache.jena.riot.RDFDataMgr;
+import org.apache.jena.riot.lang.CollectorStreamBase;
+import org.apache.jena.riot.lang.CollectorStreamTriples;
+
+import com.hp.hpl.jena.graph.Triple;
+
+/**
+ * Example of using RIOT for streaming RDF to be stored into a Collection.
+ *
+ * Suitable for single-threaded parsing, for use with small data or distributed
+ * computing frameworks (e.g. Hadoop) where the overhead of creating many threads
+ * is significant.
+ *
+ * @see CollectorStreamBase
+ */
+public class ExRIOT_7 {
+
+ public static void main(String... argv) {
+ final String filename = "data.ttl";
+
+ CollectorStreamTriples inputStream = new CollectorStreamTriples();
+ RDFDataMgr.parse(inputStream, filename);
+
+ for (Triple triple : inputStream.getCollected()) {
+ System.out.println(triple);
+ }
+ }
+
+}
Added: jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamBase.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamBase.java?rev=1538185&view=auto
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamBase.java (added)
+++ jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamBase.java Sat Nov 2 13:18:44 2013
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.riot.lang;
+
+import java.util.Collection;
+
+import org.apache.jena.atlas.lib.Tuple;
+import org.apache.jena.riot.system.PrefixMap;
+import org.apache.jena.riot.system.PrefixMapFactory;
+import org.apache.jena.riot.system.StreamRDF;
+
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.graph.Triple;
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * Base class for StreamRDF implementations which store received <T>
+ * objects in a {@link java.util.Collection}.
+ *
+ * The resulting collection can be retrieved via the {@link #getCollected()}
+ * method.
+ *
+ * Implementations are suitable for single-threaded parsing, for use with small
+ * data or distributed computing frameworks (e.g. Hadoop) where the overhead
+ * of creating many threads is significant.
+ *
+ * @param <T> Type of the value stored in the collection
+ */
+public abstract class CollectorStreamBase<T> implements StreamRDF {
+ private final PrefixMap prefixes = PrefixMapFactory.createForInput();
+ private String baseIri;
+
+ @Override
+ public void finish() {}
+
+ @Override
+ public void triple(Triple triple) {}
+
+ @Override
+ public void tuple(Tuple<Node> tuple) {}
+
+ @Override
+ public void quad(Quad quad) {}
+
+ @Override
+ public void start() {}
+
+ @Override
+ public void base(String base) {
+ this.baseIri = base;
+ }
+
+ @Override
+ public void prefix(String prefix, String iri) {
+ prefixes.add(prefix, iri);
+ }
+
+ public PrefixMap getPrefixes() {
+ return prefixes;
+ }
+
+ public String getBaseIri() {
+ return baseIri;
+ }
+
+ /**
+ * @return The collection received by this instance.
+ */
+ public abstract Collection<T> getCollected();
+}
Added: jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamQuads.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamQuads.java?rev=1538185&view=auto
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamQuads.java (added)
+++ jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamQuads.java Sat Nov 2 13:18:44 2013
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.riot.lang;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.jena.riot.system.StreamRDF;
+
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * Collector stream for quads.
+ *
+ * @see CollectorStreamBase
+ */
+public class CollectorStreamQuads extends CollectorStreamBase<Quad> implements StreamRDF {
+ private List<Quad> quads = new ArrayList<Quad>();
+
+ @Override
+ public void start() {
+ quads.clear();
+ }
+
+ @Override
+ public void quad(Quad quad) {
+ quads.add(quad);
+ }
+
+ @Override
+ public Collection<Quad> getCollected() {
+ return quads;
+ }
+}
Added: jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamTriples.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamTriples.java?rev=1538185&view=auto
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamTriples.java (added)
+++ jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamTriples.java Sat Nov 2 13:18:44 2013
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.riot.lang;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.jena.riot.system.StreamRDF;
+
+import com.hp.hpl.jena.graph.Triple;
+
+/**
+ * Collector stream for triples.
+ *
+ * @see CollectorStreamBase
+ */
+public class CollectorStreamTriples extends CollectorStreamBase<Triple> implements StreamRDF {
+ private List<Triple> triples = new ArrayList<Triple>();
+
+ @Override
+ public void start() {
+ triples.clear();
+ }
+
+ @Override
+ public void triple(Triple triple) {
+ triples.add(triple);
+ }
+
+ @Override
+ public Collection<Triple> getCollected() {
+ return triples;
+ }
+}
Added: jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamTuples.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamTuples.java?rev=1538185&view=auto
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamTuples.java (added)
+++ jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/CollectorStreamTuples.java Sat Nov 2 13:18:44 2013
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.riot.lang;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.jena.atlas.lib.Tuple;
+import org.apache.jena.riot.system.StreamRDF;
+
+import com.hp.hpl.jena.graph.Node;
+
+/**
+ * Collector stream for quads.
+ *
+ * @see CollectorStreamBase
+ */
+public class CollectorStreamTuples extends CollectorStreamBase<Tuple<Node>> implements StreamRDF {
+ private List<Tuple<Node>> tuples = new ArrayList<Tuple<Node>>();
+
+ @Override
+ public void start() {
+ tuples.clear();
+ }
+
+ @Override
+ public void tuple(Tuple<Node> tuple) {
+ tuples.add(tuple);
+ }
+
+ @Override
+ public Collection<Tuple<Node>> getCollected() {
+ return tuples;
+ }
+}
Modified: jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/lang/TS_Lang.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/lang/TS_Lang.java?rev=1538185&r1=1538184&r2=1538185&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/lang/TS_Lang.java (original)
+++ jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/lang/TS_Lang.java Sat Nov 2 13:18:44 2013
@@ -40,7 +40,7 @@ import org.junit.runners.Suite ;
, TestLangRdfJson.class
, TestParserFactory.class
, TestPipedRDFIterators.class
-
+ , TestCollectorStream.class
})
Added: jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/lang/TestCollectorStream.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/lang/TestCollectorStream.java?rev=1538185&view=auto
==============================================================================
--- jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/lang/TestCollectorStream.java (added)
+++ jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/lang/TestCollectorStream.java Sat Nov 2 13:18:44 2013
@@ -0,0 +1,83 @@
+package org.apache.jena.riot.lang;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.jena.atlas.lib.Tuple;
+import org.apache.jena.riot.system.StreamRDF;
+import org.junit.Assert;
+import org.junit.Test;
+
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.graph.NodeFactory ;
+import com.hp.hpl.jena.graph.Triple;
+import com.hp.hpl.jena.sparql.core.Quad;
+import com.hp.hpl.jena.sparql.util.NodeFactoryExtra;
+
+public class TestCollectorStream {
+
+ private List<Triple> writeTriples(StreamRDF out, int size) {
+ List<Triple> results = new ArrayList<Triple>();
+ out.start();
+ for (int i = 1; i <= size; i++) {
+ Triple t = new Triple(NodeFactory.createAnon(),
+ NodeFactory.createURI("http://predicate"), NodeFactoryExtra.intToNode(i));
+ out.triple(t);
+ results.add(t);
+ }
+ out.finish();
+ return results;
+ }
+
+ @Test
+ public void test_streamed_triples() {
+ CollectorStreamTriples out = new CollectorStreamTriples();
+ List<Triple> expected = writeTriples(out, 10);
+
+ Assert.assertEquals(expected, out.getCollected());
+ }
+
+ private List<Quad> writeQuads(StreamRDF out, int size) {
+ List<Quad> results = new ArrayList<Quad>();
+ out.start();
+ for (int i = 1; i <= size; i++) {
+ Quad q = new Quad(NodeFactory.createURI("http://graph"),
+ NodeFactory.createAnon(),
+ NodeFactory.createURI("http://predicate"), NodeFactoryExtra.intToNode(i));
+ out.quad(q);
+ results.add(q);
+ }
+ out.finish();
+ return results;
+ }
+
+ @Test
+ public void test_streamed_quads() {
+ CollectorStreamQuads out = new CollectorStreamQuads();
+ List<Quad> expected = writeQuads(out, 10);
+
+ Assert.assertEquals(expected, out.getCollected());
+ }
+
+ private List<Tuple<Node>> writeTuples(StreamRDF out, int size) {
+ List<Tuple<Node>> results = new ArrayList<Tuple<Node>>();
+ out.start();
+ for (int i = 1; i <= size; i++) {
+ Tuple<Node> t = Tuple.createTuple(NodeFactory.createURI("http://graph"),
+ NodeFactory.createAnon(),
+ NodeFactory.createURI("http://predicate"), NodeFactoryExtra.intToNode(i));
+ out.tuple(t);
+ results.add(t);
+ }
+ out.finish();
+ return results;
+ }
+
+ @Test
+ public void test_streamed_tuples() {
+ CollectorStreamTuples out = new CollectorStreamTuples();
+ List<Tuple<Node>> expected = writeTuples(out, 10);
+
+ Assert.assertEquals(expected, out.getCollected());
+ }
+}