You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by bu...@apache.org on 2014/09/03 17:00:04 UTC

svn commit: r921145 - in /websites/staging/jena/trunk/content: ./ documentation/io/index.html documentation/io/streaming-io.html

Author: buildbot
Date: Wed Sep  3 15:00:04 2014
New Revision: 921145

Log:
Staging update by buildbot for jena

Added:
    websites/staging/jena/trunk/content/documentation/io/streaming-io.html
Modified:
    websites/staging/jena/trunk/content/   (props changed)
    websites/staging/jena/trunk/content/documentation/io/index.html

Propchange: websites/staging/jena/trunk/content/
------------------------------------------------------------------------------
--- cms:source-revision (original)
+++ cms:source-revision Wed Sep  3 15:00:04 2014
@@ -1 +1 @@
-1621941
+1622276

Modified: websites/staging/jena/trunk/content/documentation/io/index.html
==============================================================================
--- websites/staging/jena/trunk/content/documentation/io/index.html (original)
+++ websites/staging/jena/trunk/content/documentation/io/index.html Wed Sep  3 15:00:04 2014
@@ -151,6 +151,7 @@
 <li><a href="#command-line-tools">Commands</a></li>
 <li><a href="rdf-input.html">Reading RDF in Jena</a></li>
 <li><a href="rdf-output.html">Writing RDF in Jena</a></li>
+<li><a href="streaming-io.html">Working with RDF Streams</a></li>
 <li><a href="rdfxml_howto.html">Additional details on working with RDF/XML</a></li>
 </ul>
 <h2 id="formats">Formats</h2>

Added: websites/staging/jena/trunk/content/documentation/io/streaming-io.html
==============================================================================
--- websites/staging/jena/trunk/content/documentation/io/streaming-io.html (added)
+++ websites/staging/jena/trunk/content/documentation/io/streaming-io.html Wed Sep  3 15:00:04 2014
@@ -0,0 +1,294 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    (the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE- 2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+
+  <title>Apache Jena - Working with RDF Streams in Apache Jena</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+  <link href="/css/bootstrap.min.css" rel="stylesheet" media="screen">
+  <link href="/css/bootstrap-extension.css" rel="stylesheet" type="text/css">
+  <link href="/css/jena.css" rel="stylesheet" type="text/css">
+  <link rel="shortcut icon" href="/images/favicon.ico" />
+  
+  <script src="https://code.jquery.com/jquery-2.0.3.min.js"></script>
+  <script src="/js/jena-navigation.js" type="text/javascript"></script>
+  <script src="/js/bootstrap.min.js" type="text/javascript"></script>
+  <script src="/js/breadcrumbs.js" type="text/javascript"></script>
+
+  
+  <!-- Uncomment to enable code coloring <link href="/css/codehilite.css" rel="stylesheet" type="text/css"> -->
+
+</head>
+
+<body>
+
+
+
+<nav class="navbar navbar-default" role="navigation">
+<div class="container">
+  <div class="navbar-header">
+  
+        <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-ex1-collapse">
+      <span class="icon-bar"></span>
+      <span class="icon-bar"></span>
+      <span class="icon-bar"></span>
+    </button>
+ 	<a class="navbar-brand" href="/index.html">
+		<img class="logo-menu" src="/images/jena-logo/jena-logo-notext-small.png" alt="jena logo">Apache Jena</a>
+  </div>
+ 
+
+
+  <div class="collapse navbar-collapse navbar-ex1-collapse">
+    <ul class="nav navbar-nav">
+
+
+
+              <li id="homepage"><a href="/index.html"><span class="glyphicon glyphicon-home"></span> Home</a></li>
+              <li id="download"><a href="/download/index.cgi"><span class="glyphicon glyphicon-download-alt"></span> Download</a></li>
+              <li class="dropdown">
+                <a href="#" class="dropdown-toggle" data-toggle="dropdown"><span class="glyphicon glyphicon-book"></span> Learn <b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li class="dropdown-header">Tutorials</li>
+                  <li><a href="/tutorials/index.html">Overview</a></li>
+                  <li><a href="/tutorials/rdf_api.html">RDF core API tutorial</a></li>
+                  <li><a href="/tutorials/sparql.html">SPARQL tutorial</a></li>
+                  <li><a href="/documentation/query/manipulating_sparql_using_arq.html">Manipulating SPARQL using ARQ</a></li>
+                  <li><a href="/tutorials/using_jena_with_eclipse.html">Using Jena with Eclipse</a></li>
+                  <li><a href="/documentation/notes/index.html">How-To's</a></li>
+                  <li class="divider"></li>
+                  <li class="dropdown-header">References</li>
+                  <li><a href="/documentation/index.html">Overview</a></li>
+                  <li><a href="/documentation/javadoc/">Javadoc</a></li>
+                  <li><a href="/documentation/rdf/index.html">RDF API</a></li>
+                  <li><a href="/documentation/io/">RDF I/O</a></li>
+                  <li><a href="/documentation/query/index.html">ARQ (SPARQL)</a></li>
+                  <li><a href="/documentation/query/text-query.html">Text Search</a></li>
+                  <li><a href="/documentation/tdb/index.html">TDB</a></li>
+		  <li><a href="/documentation/sdb/index.html">SDB</a></li>
+		  <li><a href="/documentation/jdbc/index.html">SPARQL over JDBC</a></li>
+		  <li><a href="/documentation/security/index.html">Security</a></li>
+                  <li><a href="/documentation/serving_data/index.html">Fuseki</a></li>
+                  <li><a href="/documentation/assembler/index.html">Assembler</a></li>
+                  <li><a href="/documentation/ontology/">Ontology API</a></li>
+                  <li><a href="/documentation/inference/index.html">Inference API</a></li>
+                  <li><a href="/documentation/tools/index.html">Command-line tools</a></li>
+                </ul>
+              </li>
+
+              <li class="drop down">
+                <a href="#" class="dropdown-toggle" data-toggle="dropdown"><span class="glyphicon glyphicon-book"></span> Javadoc <b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/documentation/javadoc/jena/">Jena Core</a></li>
+                  <li><a href="/documentation/javadoc/arq/">ARQ</a></li>
+                  <li><a href="/documentation/javadoc/tdb/">TDB</a></li>
+                  <li><a href="/documentation/javadoc/text/">Text Search</a></li>
+                  <li><a href="/documentation/javadoc/spatial/">Spatial Search</a></li>
+                  <li><a href="/documentation/javadoc/security/">Security</a></li>
+                  <li><a href="/documentation/javadoc/jdbc/">JDBC</a></li>
+                  <li><a href="/documentation/javadoc/sdb/">SDB</a></li>
+                  <li><a href="/documentation/javadoc/fuseki/">Fuseki</a></li>
+                </ul>
+              </li>
+
+              <li id="ask"><a href="/help_and_support/index.html"><span class="glyphicon glyphicon-question-sign"></span> Ask</a></li>
+              
+              <li class="dropdown">
+                <a href="#" class="dropdown-toggle" data-toggle="dropdown"><span class="glyphicon glyphicon-bullhorn"></span> Get involved <b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/getting_involved/index.html">Contribute</a></li>
+                  <li><a href="/help_and_support/bugs_and_suggestions.html">Report a bug</a></li>
+                  <li class="divider"></li>
+                  <li class="dropdown-header">Project</li>
+                  <li><a href="/about_jena/about.html">About Jena</a></li>
+                  <li><a href="/about_jena/roadmap.html">Roadmap</a></li>
+                  <li><a href="/about_jena/architecture.html">Architecture</a></li>
+                  <li><a href="/about_jena/team.html">Project team</a></li>
+                  <li><a href="/about_jena/contributions.html">Related projects</a></li>
+                  <li class="divider"></li>
+                  <li class="dropdown-header">ASF</li>
+                  <li><a href="http://www.apache.org/">Apache Software Foundation</a></li>
+                  <li><a href="http://www.apache.org/licenses/LICENSE-2.0">License</a></li>
+                  <li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li>
+                  <li><a href="http://www.apache.org/foundation/sponsorship.html">Become a Sponsor</a></li>
+                  <li><a href="http://www.apache.org/security/">Security</a></li>
+                </ul>
+              </li>
+
+   
+    </ul>
+  </div>
+</div>
+</nav>
+
+
+<div class="container">
+	<div class="row">
+	<div class="col-md-12">
+	<div id="breadcrumbs"></div>
+	<h1 class="title">Working with RDF Streams in Apache Jena</h1>
+  <p>Jena has operations useful in processing RDF in a streaming
+fashion. Streaming can be used for manipulating RDF at scale.  Jena
+provides high performance readers and writers for all standard RDF formats,
+and it can be extended with custom formats.</p>
+<p>The <a href="http://afs.github.io/rdf-thrift">RDF Thrift</a> provides the highest
+input parsing performance.  N-Triples/N-Quads provide the highest
+input parsing performance using W3C Standards.</p>
+<p>Files ending in <code>.gz</code> are assumed to be gzip-compressed. Input and output
+to such files takes this into account, including looking for the other file
+extension.  <code>data.nt.gz</code> is a parsed as a gzip-compressed N-Triples file.</p>
+<p>== <code>StreamRDF</code></p>
+<p>The central abstraction is <code>StreamRDF</code> which is an interface for streamed
+RDF data.  It covers triples and quads, and also parser events such as
+prefix settings and base URI declarations.</p>
+<p>```
+public interface StreamRDF {}
+{
+    /*<em> Start parsing </em>/
+    public void start() ;</p>
+<div class="codehilite"><pre><span class="cm">/** Triple emitted */</span>
+<span class="n">public</span> <span class="k">void</span> <span class="n">triple</span><span class="p">(</span><span class="n">Triple</span> <span class="n">triple</span><span class="p">)</span> <span class="p">;</span>
+
+<span class="cm">/** Quad emitted */</span>
+<span class="n">public</span> <span class="k">void</span> <span class="n">quad</span><span class="p">(</span><span class="n">Quad</span> <span class="n">quad</span><span class="p">)</span> <span class="p">;</span>
+
+<span class="cm">/** base declaration seen */</span>
+<span class="n">public</span> <span class="k">void</span> <span class="n">base</span><span class="p">(</span><span class="n">String</span> <span class="n">base</span><span class="p">)</span> <span class="p">;</span>
+
+<span class="cm">/** prefix declaration seen */</span>
+<span class="n">public</span> <span class="k">void</span> <span class="n">prefix</span><span class="p">(</span><span class="n">String</span> <span class="n">prefix</span><span class="p">,</span> <span class="n">String</span> <span class="n">iri</span><span class="p">)</span> <span class="p">;</span>
+
+<span class="cm">/** Finish parsing */</span>
+<span class="n">public</span> <span class="k">void</span> <span class="n">finish</span><span class="p">()</span> <span class="p">;</span>
+</pre></div>
+
+
+<p>}
+```</p>
+<p>There are utilities to help:</p>
+<ul>
+<li><code>StreamRDFLib</code> -- create <code>StreamRDF</code> objects</li>
+<li><code>StreamOps</code> -- helpers for sending RDF data to <code>StreamRDF</code> objects</li>
+</ul>
+<p>== Reading data</p>
+<p>All parses of RDF syntaxes provided by RIOT are streaming with the
+exception of JSON-LD.  A JSON object can have members in any order so the
+parser may need the whole top-level object in order to have the information
+needed for parsing.</p>
+<p>The <code>parse</code> functions of <code>RDFDataMgr</code> direct the output of the parser to a
+<code>StreamRDF</code>.  For example:</p>
+<div class="codehilite"><pre><span class="n">StreamRDF</span> <span class="n">destinination</span> <span class="p">=</span> <span class="p">...</span> 
+<span class="n">RDFDataMgr</span><span class="p">.</span><span class="n">parse</span><span class="p">(</span><span class="n">destination</span><span class="p">,</span> &quot;<span class="n">http</span><span class="p">:</span><span class="o">//</span><span class="n">example</span><span class="o">/</span><span class="n">data</span><span class="p">.</span><span class="n">ttl</span>&quot;<span class="p">)</span> <span class="p">;</span>
+</pre></div>
+
+
+<p>reads the remote URL, with content negotiation, and send the triples to the
+<code>destination</code>.</p>
+<p>== Writing data</p>
+<p>Not all RDF formats are suitable for writing as a stream.  Formats that
+provide pretty printing (for example the default <code>RDFFormat</code> for each of
+Turtle, TriG and RDF/XML) require analysis of the whole of a model in order
+to determine nestable structures of blank nodes and for using specific
+syntax for RDF lists.</p>
+<p>These languages can be used for streaming output but with an appearance
+that is necessarily "less pretty".
+See <a href="rdf-output.html#streamed-block-formats">"Streamed Block Formats"</a> 
+for details.</p>
+<p>The <code>StreamRDFWriter</code> class has functions that write graphs and datasets
+using a streaming writer and also provides for the creation of
+an<code>StreamRDF</code> backed by a stream-based writer</p>
+<div class="codehilite"><pre><span class="n">StreamWriter</span><span class="p">.</span><span class="n">write</span><span class="p">(</span><span class="n">output</span><span class="p">,</span> <span class="n">model</span><span class="p">.</span><span class="n">getGraph</span><span class="p">(),</span> <span class="n">lang</span><span class="p">)</span> <span class="p">;</span>
+</pre></div>
+
+
+<p>which can be done as:</p>
+<div class="codehilite"><pre><span class="n">StreamRDF</span> <span class="n">writer</span> <span class="p">=</span> <span class="n">StreamWriter</span><span class="p">.</span><span class="n">getWriterStream</span><span class="p">(</span><span class="n">output</span><span class="p">,</span> <span class="n">lang</span><span class="p">)</span> <span class="p">;</span>
+<span class="n">StreamOps</span><span class="p">.</span><span class="n">graphToStream</span><span class="p">(</span><span class="n">writer</span><span class="p">,</span> <span class="n">model</span><span class="p">.</span><span class="n">getGraph</span><span class="p">())</span> <span class="p">;</span>
+</pre></div>
+
+
+<p>N-Triples and N-Quads are always written as a stream.</p>
+<table>
+<thead>
+<tr>
+<th>Lang</th>
+<th>RDFFormat</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>Lang.TURTLE</code></td>
+<td><code>RDFFormat.TURTLE_BLOCKS</code></td>
+</tr>
+<tr>
+<td></td>
+<td><code>RDFFormat.TURTLE_FLAT</code></td>
+</tr>
+<tr>
+<td><code>Lang.TRIG</code></td>
+<td><code>RDFFormat.TRIG_BLOCKS</code></td>
+</tr>
+<tr>
+<td></td>
+<td><code>RDFFormat.TRIG_FLAT</code></td>
+</tr>
+<tr>
+<td><code>Lang.NTRIPLES</code></td>
+<td><code>RDFFormat.NTRIPLES_UTF8</code></td>
+</tr>
+<tr>
+<td></td>
+<td><code>RDFFormat.NTRIPLES_ASCII</code></td>
+</tr>
+<tr>
+<td><code>Lang.NQUADS</code></td>
+<td><code>RDFFormat.NQUADS_UTF8</code></td>
+</tr>
+<tr>
+<td></td>
+<td><code>RDFFormat.NQUADS_ASCII</code></td>
+</tr>
+<tr>
+<td><code>Lang.RDFTHRIFT</code></td>
+<td><code>RDFFormat.RDF_THRIFT</code></td>
+</tr>
+</tbody>
+</table>
+  </div>
+</div>
+
+</div><!--/.container -->
+
+    <footer class="footer">
+      <div class="container">
+        <p>Copyright &copy; 2011&ndash;2014 The Apache Software Foundation, Licensed under
+        the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.
+        </p>
+        <p>
+        Apache Jena, Jena, the Apache Jena project logo,
+        Apache and the Apache feather logos are trademarks of The Apache Software Foundation.
+        </p>
+      </div>
+  </footer>
+      
+
+</body>
+</html>