You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2012/01/10 17:47:22 UTC
svn commit: r1229633 - in /incubator/jena/Jena2/ARQ/trunk/src:
main/java/com/hp/hpl/jena/sparql/resultset/ main/java/org/openjena/atlas/io/
test/java/com/hp/hpl/jena/sparql/resultset/
Author: andy
Date: Tue Jan 10 16:47:22 2012
New Revision: 1229633
URL: http://svn.apache.org/viewvc?rev=1229633&view=rev
Log:
JENA-187 - Stream based parsing for TSVInput
Added:
incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/resultset/TSVInputIterator.java
Modified:
incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/resultset/TSVInput.java
incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/atlas/io/IO.java
incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/resultset/TestResultSetFormat1.java
incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/resultset/TestResultSetFormat2.java
Modified: incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/resultset/TSVInput.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/resultset/TSVInput.java?rev=1229633&r1=1229632&r2=1229633&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/resultset/TSVInput.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/resultset/TSVInput.java Tue Jan 10 16:47:22 2012
@@ -26,72 +26,52 @@ import java.util.List ;
import java.util.regex.Pattern ;
import org.openjena.atlas.io.IO ;
-import org.openjena.riot.tokens.Tokenizer ;
-import org.openjena.riot.tokens.TokenizerFactory ;
-import com.hp.hpl.jena.graph.Node ;
import com.hp.hpl.jena.query.ResultSet ;
import com.hp.hpl.jena.sparql.ARQException ;
import com.hp.hpl.jena.sparql.core.Var ;
import com.hp.hpl.jena.sparql.engine.ResultSetStream ;
-import com.hp.hpl.jena.sparql.engine.binding.Binding ;
-import com.hp.hpl.jena.sparql.engine.binding.BindingFactory ;
-import com.hp.hpl.jena.sparql.engine.binding.BindingMap ;
-import com.hp.hpl.jena.sparql.engine.iterator.QueryIterPlainWrapper ;
/**
* Input reader associated to {@link TSVOutput}.
- *
- * @author Laurent Pellegrino
*/
public class TSVInput {
static Pattern pattern = Pattern.compile("\t");
- public static ResultSet fromTSV(InputStream in) {
+ /**
+ * Reads SPARQL Results from TSV format into a {@link ResultSet} instance
+ */
+ public static ResultSet fromTSV(InputStream in)
+ {
BufferedReader reader = IO.asBufferedUTF8(in);
List<Var> vars = new ArrayList<Var>();
List<String> varNames = new ArrayList<String>();
- List<Binding> bindings = new ArrayList<Binding>();
boolean first = true;
String str = null;
- int line = 0;
- try {
- while ( ( str = reader.readLine() ) != null ) {
- line++;
- String[] tokens = pattern.split(str,-1);
- if ( first ) {
- for ( String token : tokens ) {
- if ( token.startsWith("?") )
- token = token.substring(1);
- Var var = Var.alloc(token);
- vars.add(var);
- varNames.add(var.getName());
- }
- first = false;
- } else {
- int num_tokens = tokens.length;
- if ( num_tokens != vars.size() ) {
- throw new ARQException(String.format("Line %d has %d values instead of %d.", line, num_tokens, vars.size()));
- }
- BindingMap binding = BindingFactory.create();
- for ( int i = 0; i < tokens.length; i++ ) {
- String token = tokens[i];
- Tokenizer tokenizer = TokenizerFactory.makeTokenizerString(token);
- if ( tokenizer.hasNext() && token.length() > 0 ) {
- Node node = tokenizer.next().asNode();
- binding.add(vars.get(i), node);
- }
- }
- bindings.add(binding);
- }
+ try
+ {
+ //Here we try to parse only the Header Row
+ str = reader.readLine();
+ String[] tokens = pattern.split(str,-1);
+ for ( String token : tokens )
+ {
+ if (token.startsWith("?")) token = token.substring(1);
+ Var var = Var.alloc(token);
+ vars.add(var);
+ varNames.add(var.getName());
}
- } catch ( IOException ex ) {
+ }
+ catch ( IOException ex )
+ {
throw new ARQException(ex) ;
}
- return new ResultSetStream(varNames, null, new QueryIterPlainWrapper(bindings.iterator()));
+ //Generate an instance of ResultSetStream using TSVInputIterator
+ //This will parse actual result rows as needed thus minimising memory usage
+ return new ResultSetStream(varNames, null, new TSVInputIterator(reader, vars));
}
+
}
Added: incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/resultset/TSVInputIterator.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/resultset/TSVInputIterator.java?rev=1229633&view=auto
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/resultset/TSVInputIterator.java (added)
+++ incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/resultset/TSVInputIterator.java Tue Jan 10 16:47:22 2012
@@ -0,0 +1,135 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.hp.hpl.jena.sparql.resultset;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+import org.openjena.atlas.io.IO ;
+import org.openjena.atlas.io.IndentedWriter;
+
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.query.QueryException;
+import com.hp.hpl.jena.sparql.core.Var;
+import com.hp.hpl.jena.sparql.engine.binding.Binding;
+import com.hp.hpl.jena.sparql.engine.binding.BindingFactory;
+import com.hp.hpl.jena.sparql.engine.binding.BindingMap;
+import com.hp.hpl.jena.sparql.engine.iterator.QueryIteratorBase;
+import com.hp.hpl.jena.sparql.serializer.SerializationContext;
+import com.hp.hpl.jena.sparql.util.NodeFactory;
+
+
+/**
+ * Class used to do streaming parsing of actual result rows from the TSV
+ */
+public class TSVInputIterator extends QueryIteratorBase
+{
+ private BufferedReader reader;
+ private BindingMap binding;
+ private int expectedItems;
+ private List<Var> vars;
+
+ /**
+ * Creates a new TSV Input Iterator
+ * <p>
+ * Assumes the Header Row has already been read and that the next row to be read from the reader will be a Result Row
+ * </p>
+ */
+ public TSVInputIterator(BufferedReader reader, List<Var> vars)
+ {
+ this.reader = reader;
+ this.expectedItems = vars.size();
+ this.vars = vars;
+ }
+
+ @Override
+ public void output(IndentedWriter out, SerializationContext sCxt) {
+ // Not needed - only called as part of printing/debugging query plans.
+ out.println("TSVInputIterator") ;
+ }
+
+ @Override
+ protected boolean hasNextBinding() {
+ if (this.reader != null)
+ {
+ if (this.binding == null)
+ return this.parseNextBinding();
+ else
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+ private boolean parseNextBinding()
+ {
+ String line;
+ try
+ {
+ line = this.reader.readLine();
+ //Once EOF has been reached we'll see null for this call so we can return false because there are no further bindings
+ if (line == null) return false;
+ }
+ catch (IOException e)
+ { throw new QueryException("Error parsing TSV results - " + e.getMessage());
+ }
+ String[] tokens = TSVInput.pattern.split(line, -1);
+
+ if (tokens.length != expectedItems)
+ throw new QueryException(String.format("Error Parsing TSV results - A result row had %d values instead of the expected %d.", tokens.length, expectedItems));
+
+ this.binding = BindingFactory.create();
+ for ( int i = 0; i < tokens.length; i++ )
+ {
+ String token = tokens[i];
+
+ //If we see an empty string this denotes an unbound value
+ if (token.equals("")) continue;
+
+ //Bound value so parse it and add to the binding
+ Node node = NodeFactory.parseNode(token);
+ this.binding.add(this.vars.get(i), node);
+ }
+ return true;
+ }
+
+ @Override
+ protected Binding moveToNextBinding() {
+ if (!hasNext()) throw new NoSuchElementException() ;
+ Binding b = this.binding;
+ this.binding = null ;
+ return b;
+ }
+
+ @Override
+ protected void closeIterator() {
+ IO.close(reader) ;
+ reader = null;
+ }
+
+ @Override
+ protected void requestCancel() {
+ //Don't need to do anything special to cancel
+ //Superclass should take care of that and call closeIterator() where we do our actual clean up
+ }
+}
Modified: incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/atlas/io/IO.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/atlas/io/IO.java?rev=1229633&r1=1229632&r2=1229633&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/atlas/io/IO.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/atlas/io/IO.java Tue Jan 10 16:47:22 2012
@@ -136,19 +136,11 @@ public class IO
public static void close(java.io.Closeable resource)
{
+ if ( resource == null )
+ return ;
try { resource.close(); } catch (IOException ex) { exception(ex) ; }
}
- public static void flish(OutputStream out)
- {
- try { out.flush(); } catch (IOException ex) { exception(ex) ; }
- }
-
- public static void flish(Writer out)
- {
- try { out.flush(); } catch (IOException ex) { exception(ex) ; }
- }
-
public static void exception(IOException ex)
{
throw new AtlasException(ex) ;
@@ -160,10 +152,18 @@ public class IO
}
public static void flush(OutputStream out)
- { try { out.flush(); } catch (IOException ex) { exception(ex) ; } }
+ {
+ if ( out == null )
+ return ;
+ try { out.flush(); } catch (IOException ex) { exception(ex) ; }
+ }
public static void flush(Writer out)
- { try { out.flush(); } catch (IOException ex) { exception(ex) ; } }
+ {
+ if ( out == null )
+ return ;
+ try { out.flush(); } catch (IOException ex) { exception(ex) ; }
+ }
private static final int BUFFER_SIZE = 8*1024 ;
Modified: incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/resultset/TestResultSetFormat1.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/resultset/TestResultSetFormat1.java?rev=1229633&r1=1229632&r2=1229633&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/resultset/TestResultSetFormat1.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/resultset/TestResultSetFormat1.java Tue Jan 10 16:47:22 2012
@@ -23,6 +23,7 @@ import java.io.ByteArrayOutputStream ;
import java.util.Arrays ;
import java.util.Collection ;
+import org.junit.Assert;
import org.junit.Test ;
import org.junit.runner.RunWith ;
import org.junit.runners.Parameterized ;
@@ -32,6 +33,7 @@ import org.openjena.atlas.lib.StrUtils ;
import com.hp.hpl.jena.query.ResultSet ;
import com.hp.hpl.jena.query.ResultSetFactory ;
import com.hp.hpl.jena.query.ResultSetFormatter ;
+import com.hp.hpl.jena.query.ResultSetRewindable;
import com.hp.hpl.jena.sparql.sse.Item ;
import com.hp.hpl.jena.sparql.sse.SSE ;
import com.hp.hpl.jena.sparql.sse.builders.BuilderResultSet ;
@@ -79,7 +81,7 @@ public class TestResultSetFormat1
String x = StrUtils.strjoinNL(strings) ;
Item item = SSE.parse(x) ;
- return BuilderResultSet.build(item) ;
+ return ResultSetFactory.makeRewindable(BuilderResultSet.build(item));
}
@Test public void resultset_01()
@@ -95,6 +97,7 @@ public class TestResultSetFormat1
ResultSetFormatter.outputAsXML(out, rs) ;
ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()) ;
ResultSet rs2 = ResultSetFactory.fromXML(in) ;
+ areIsomorphic(rs, rs2);
}
@Test public void resultset_03()
@@ -104,6 +107,7 @@ public class TestResultSetFormat1
ResultSetFormatter.outputAsJSON(out, rs) ;
ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()) ;
ResultSet rs2 = ResultSetFactory.fromJSON(in) ;
+ areIsomorphic(rs, rs2);
}
@Test public void resultset_04()
@@ -113,6 +117,7 @@ public class TestResultSetFormat1
ResultSetFormatter.outputAsTSV(out, rs) ;
ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()) ;
ResultSet rs2 = ResultSetFactory.fromTSV(in) ;
+ areIsomorphic(rs, rs2);
}
@Test public void resultset_05()
@@ -121,4 +126,17 @@ public class TestResultSetFormat1
ByteArrayOutputStream out = new ByteArrayOutputStream() ;
ResultSetFormatter.outputAsCSV(out, rs) ;
}
+
+ private static void areIsomorphic(ResultSet x, ResultSet y)
+ {
+ ResultSetRewindable rs1 = ResultSetFactory.makeRewindable(x) ;
+ ResultSetRewindable rs2 = ResultSetFactory.makeRewindable(y) ;
+// System.out.println(ResultSetFormatter.asText(rs1));
+// System.out.println();
+// System.out.println(ResultSetFormatter.asText(rs2));
+// rs1.reset();
+// rs2.reset();
+ Assert.assertTrue(ResultSetCompare.isomorphic(rs1, rs2)) ;
+ }
+
}
Modified: incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/resultset/TestResultSetFormat2.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/resultset/TestResultSetFormat2.java?rev=1229633&r1=1229632&r2=1229633&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/resultset/TestResultSetFormat2.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/resultset/TestResultSetFormat2.java Tue Jan 10 16:47:22 2012
@@ -18,18 +18,20 @@
package com.hp.hpl.jena.sparql.resultset;
-import java.io.ByteArrayInputStream;
+import java.io.ByteArrayInputStream ;
-import org.junit.Test;
-import org.openjena.atlas.lib.StrUtils;
+import org.junit.Test ;
+import org.openjena.atlas.lib.StrUtils ;
-import com.hp.hpl.jena.query.ResultSet;
-import com.hp.hpl.jena.query.ResultSetFactory;
-import com.hp.hpl.jena.sparql.ARQException;
+import com.hp.hpl.jena.query.QueryException ;
+import com.hp.hpl.jena.query.ResultSet ;
+import com.hp.hpl.jena.query.ResultSetFactory ;
+import com.hp.hpl.jena.sparql.engine.binding.Binding ;
public class TestResultSetFormat2
{
- @Test (expected=ARQException.class) public void resultset_10()
+ @Test (expected=QueryException.class)
+ public void resultset_10()
{
// This is illegal
// Two vars, row of 3 values.
@@ -37,9 +39,16 @@ public class TestResultSetFormat2
byte[] b = StrUtils.asUTF8bytes(x) ;
ByteArrayInputStream in = new ByteArrayInputStream(b) ;
ResultSet rs2 = ResultSetFactory.fromTSV(in) ;
+
+ while (rs2.hasNext())
+ {
+ Binding binding = rs2.nextBinding();
+ System.out.println(binding);
+ }
}
- @Test (expected=ARQException.class) public void resultset_11()
+ @Test (expected=QueryException.class)
+ public void resultset_11()
{
// This is illegal
// Two vars, row of 1 value only.
@@ -47,6 +56,12 @@ public class TestResultSetFormat2
byte[] b = StrUtils.asUTF8bytes(x) ;
ByteArrayInputStream in = new ByteArrayInputStream(b) ;
ResultSet rs2 = ResultSetFactory.fromTSV(in) ;
+
+ while (rs2.hasNext())
+ {
+ Binding binding = rs2.nextBinding();
+ System.out.println(binding);
+ }
}
}