You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2012/01/10 17:47:22 UTC

svn commit: r1229633 - in /incubator/jena/Jena2/ARQ/trunk/src: main/java/com/hp/hpl/jena/sparql/resultset/ main/java/org/openjena/atlas/io/ test/java/com/hp/hpl/jena/sparql/resultset/

Author: andy
Date: Tue Jan 10 16:47:22 2012
New Revision: 1229633

URL: http://svn.apache.org/viewvc?rev=1229633&view=rev
Log:
JENA-187 - Stream based parsing for TSVInput

Added:
    incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/resultset/TSVInputIterator.java
Modified:
    incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/resultset/TSVInput.java
    incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/atlas/io/IO.java
    incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/resultset/TestResultSetFormat1.java
    incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/resultset/TestResultSetFormat2.java

Modified: incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/resultset/TSVInput.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/resultset/TSVInput.java?rev=1229633&r1=1229632&r2=1229633&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/resultset/TSVInput.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/resultset/TSVInput.java Tue Jan 10 16:47:22 2012
@@ -26,72 +26,52 @@ import java.util.List ;
 import java.util.regex.Pattern ;
 
 import org.openjena.atlas.io.IO ;
-import org.openjena.riot.tokens.Tokenizer ;
-import org.openjena.riot.tokens.TokenizerFactory ;
 
-import com.hp.hpl.jena.graph.Node ;
 import com.hp.hpl.jena.query.ResultSet ;
 import com.hp.hpl.jena.sparql.ARQException ;
 import com.hp.hpl.jena.sparql.core.Var ;
 import com.hp.hpl.jena.sparql.engine.ResultSetStream ;
-import com.hp.hpl.jena.sparql.engine.binding.Binding ;
-import com.hp.hpl.jena.sparql.engine.binding.BindingFactory ;
-import com.hp.hpl.jena.sparql.engine.binding.BindingMap ;
-import com.hp.hpl.jena.sparql.engine.iterator.QueryIterPlainWrapper ;
 
 /**
  * Input reader associated to {@link TSVOutput}.
- * 
- * @author Laurent Pellegrino
  */
 public class TSVInput {
 
 	static Pattern pattern = Pattern.compile("\t");
 	
-    public static ResultSet fromTSV(InputStream in) {
+	/**
+	 * Reads SPARQL Results from TSV format into a {@link ResultSet} instance
+	 */
+    public static ResultSet fromTSV(InputStream in)
+    {
     	BufferedReader reader = IO.asBufferedUTF8(in);
         List<Var> vars = new ArrayList<Var>();
         List<String> varNames = new ArrayList<String>();
-        List<Binding> bindings = new ArrayList<Binding>();
 
         boolean first = true;
     	String str = null;
-    	int line = 0;
-        try {
-        	while ( ( str = reader.readLine() ) != null ) {
-        		line++;
-        		String[] tokens = pattern.split(str,-1);
-        		if ( first ) {
-        			for ( String token : tokens ) {
-                		if ( token.startsWith("?") ) 
-                			token = token.substring(1);
-                		Var var = Var.alloc(token);
-                		vars.add(var);
-                		varNames.add(var.getName());
-                	}
-                	first = false;
-        		} else {
-        			int num_tokens = tokens.length;
-        	        if ( num_tokens != vars.size() ) {
-        	        	 throw new ARQException(String.format("Line %d has %d values instead of %d.", line, num_tokens, vars.size()));
-        	        }
-        	        BindingMap binding = BindingFactory.create();
-        	        for ( int i = 0; i < tokens.length; i++ ) {
-        	        	String token = tokens[i];
-                		Tokenizer tokenizer = TokenizerFactory.makeTokenizerString(token);
-                		if ( tokenizer.hasNext() && token.length() > 0 ) {
-                			Node node = tokenizer.next().asNode();
-                			binding.add(vars.get(i), node);
-                		}
-                	}
-                	bindings.add(binding);
-        		}
+        try 
+        {
+        	//Here we try to parse only the Header Row
+        	str = reader.readLine();
+        	String[] tokens = pattern.split(str,-1);
+        	for ( String token : tokens ) 
+        	{
+        		if (token.startsWith("?")) token = token.substring(1);
+        		Var var = Var.alloc(token);
+        		vars.add(var);
+        		varNames.add(var.getName());
         	}
-        } catch ( IOException ex ) {
+        } 
+        catch ( IOException ex )
+        {
         	throw new ARQException(ex) ;
         }
 
-        return new ResultSetStream(varNames, null, new QueryIterPlainWrapper(bindings.iterator()));
+        //Generate an instance of ResultSetStream using TSVInputIterator
+        //This will parse actual result rows as needed thus minimising memory usage
+        return new ResultSetStream(varNames, null, new TSVInputIterator(reader, vars));
     }
+ 
 
 }

Added: incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/resultset/TSVInputIterator.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/resultset/TSVInputIterator.java?rev=1229633&view=auto
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/resultset/TSVInputIterator.java (added)
+++ incubator/jena/Jena2/ARQ/trunk/src/main/java/com/hp/hpl/jena/sparql/resultset/TSVInputIterator.java Tue Jan 10 16:47:22 2012
@@ -0,0 +1,135 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.hp.hpl.jena.sparql.resultset;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+import org.openjena.atlas.io.IO ;
+import org.openjena.atlas.io.IndentedWriter;
+
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.query.QueryException;
+import com.hp.hpl.jena.sparql.core.Var;
+import com.hp.hpl.jena.sparql.engine.binding.Binding;
+import com.hp.hpl.jena.sparql.engine.binding.BindingFactory;
+import com.hp.hpl.jena.sparql.engine.binding.BindingMap;
+import com.hp.hpl.jena.sparql.engine.iterator.QueryIteratorBase;
+import com.hp.hpl.jena.sparql.serializer.SerializationContext;
+import com.hp.hpl.jena.sparql.util.NodeFactory;
+
+
+/**
+ * Class used to do streaming parsing of actual result rows from the TSV
+ */
+public class TSVInputIterator extends QueryIteratorBase
+{
+	private BufferedReader reader;
+	private BindingMap binding;
+	private int expectedItems;
+	private List<Var> vars;
+	
+	/**
+	 * Creates a new TSV Input Iterator
+	 * <p>
+	 * Assumes the Header Row has already been read and that the next row to be read from the reader will be a Result Row
+	 * </p>
+	 */
+	public TSVInputIterator(BufferedReader reader, List<Var> vars)
+	{
+		this.reader = reader;
+		this.expectedItems = vars.size();
+		this.vars = vars;
+	}
+	
+	@Override
+	public void output(IndentedWriter out, SerializationContext sCxt) {
+	    // Not needed - only called as part of printing/debugging query plans.
+		out.println("TSVInputIterator") ;
+	}
+
+	@Override
+	protected boolean hasNextBinding() {
+		if (this.reader != null)
+		{
+			if (this.binding == null)
+				return this.parseNextBinding();
+			else
+				return true;
+		}
+		else
+		{
+			return false;
+		}
+	}
+	
+	private boolean parseNextBinding()
+	{
+		String line;
+		try 
+		{
+			line = this.reader.readLine();
+			//Once EOF has been reached we'll see null for this call so we can return false because there are no further bindings
+			if (line == null) return false;
+		} 
+		catch (IOException e) 
+		{ throw new QueryException("Error parsing TSV results - " + e.getMessage()); 
+		}
+		String[] tokens = TSVInput.pattern.split(line, -1);
+		
+        if (tokens.length != expectedItems)
+        	 throw new QueryException(String.format("Error Parsing TSV results - A result row had %d values instead of the expected %d.", tokens.length, expectedItems));
+
+        this.binding = BindingFactory.create();
+        for ( int i = 0; i < tokens.length; i++ ) 
+        {
+        	String token = tokens[i];
+        	
+        	//If we see an empty string this denotes an unbound value
+        	if (token.equals("")) continue; 
+        	
+        	//Bound value so parse it and add to the binding
+        	Node node = NodeFactory.parseNode(token);
+        	this.binding.add(this.vars.get(i), node);
+        }
+        return true;
+	}
+
+	@Override
+	protected Binding moveToNextBinding() {
+        if (!hasNext()) throw new NoSuchElementException() ;
+        Binding b = this.binding;
+        this.binding = null ;
+        return b;
+	}
+
+	@Override
+	protected void closeIterator() {
+	    IO.close(reader) ;
+	    reader = null;
+	}
+
+	@Override
+	protected void requestCancel() {
+		//Don't need to do anything special to cancel
+		//Superclass should take care of that and call closeIterator() where we do our actual clean up
+	}
+}

Modified: incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/atlas/io/IO.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/atlas/io/IO.java?rev=1229633&r1=1229632&r2=1229633&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/atlas/io/IO.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/atlas/io/IO.java Tue Jan 10 16:47:22 2012
@@ -136,19 +136,11 @@ public class IO
     
     public static void close(java.io.Closeable resource)
     {
+        if ( resource == null )
+            return ;
         try { resource.close(); } catch (IOException ex) { exception(ex) ; }
     }
     
-    public static void flish(OutputStream out)
-    {
-        try { out.flush(); } catch (IOException ex) { exception(ex) ; }
-    }
-    
-    public static void flish(Writer out)
-    {
-        try { out.flush(); } catch (IOException ex) { exception(ex) ; }
-    }
-
     public static void exception(IOException ex)
     {
         throw new AtlasException(ex) ;
@@ -160,10 +152,18 @@ public class IO
     }
     
     public static void flush(OutputStream out)
-    { try { out.flush(); } catch (IOException ex) { exception(ex) ; } }
+    { 
+        if ( out == null )
+            return ;
+        try { out.flush(); } catch (IOException ex) { exception(ex) ; }
+    }
     
     public static void flush(Writer out)
-    { try { out.flush(); } catch (IOException ex) { exception(ex) ; } }
+    {
+        if ( out == null )
+            return ;
+        try { out.flush(); } catch (IOException ex) { exception(ex) ; } 
+    }
 
     private static final int BUFFER_SIZE = 8*1024 ; 
     

Modified: incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/resultset/TestResultSetFormat1.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/resultset/TestResultSetFormat1.java?rev=1229633&r1=1229632&r2=1229633&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/resultset/TestResultSetFormat1.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/resultset/TestResultSetFormat1.java Tue Jan 10 16:47:22 2012
@@ -23,6 +23,7 @@ import java.io.ByteArrayOutputStream ;
 import java.util.Arrays ;
 import java.util.Collection ;
 
+import org.junit.Assert;
 import org.junit.Test ;
 import org.junit.runner.RunWith ;
 import org.junit.runners.Parameterized ;
@@ -32,6 +33,7 @@ import org.openjena.atlas.lib.StrUtils ;
 import com.hp.hpl.jena.query.ResultSet ;
 import com.hp.hpl.jena.query.ResultSetFactory ;
 import com.hp.hpl.jena.query.ResultSetFormatter ;
+import com.hp.hpl.jena.query.ResultSetRewindable;
 import com.hp.hpl.jena.sparql.sse.Item ;
 import com.hp.hpl.jena.sparql.sse.SSE ;
 import com.hp.hpl.jena.sparql.sse.builders.BuilderResultSet ;
@@ -79,7 +81,7 @@ public class TestResultSetFormat1
         
         String x = StrUtils.strjoinNL(strings) ;
         Item item = SSE.parse(x) ;
-        return BuilderResultSet.build(item) ;
+        return ResultSetFactory.makeRewindable(BuilderResultSet.build(item));
     }
     
     @Test public void resultset_01()           
@@ -95,6 +97,7 @@ public class TestResultSetFormat1
         ResultSetFormatter.outputAsXML(out, rs) ;
         ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()) ;
         ResultSet rs2 = ResultSetFactory.fromXML(in) ;
+        areIsomorphic(rs, rs2);
     }
 
     @Test public void resultset_03()           
@@ -104,6 +107,7 @@ public class TestResultSetFormat1
         ResultSetFormatter.outputAsJSON(out, rs) ;
         ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()) ;
         ResultSet rs2 = ResultSetFactory.fromJSON(in) ;
+        areIsomorphic(rs, rs2);
     }
     
     @Test public void resultset_04()           
@@ -113,6 +117,7 @@ public class TestResultSetFormat1
         ResultSetFormatter.outputAsTSV(out, rs) ;
         ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()) ;
         ResultSet rs2 = ResultSetFactory.fromTSV(in) ;
+        areIsomorphic(rs, rs2);
     }
 
     @Test public void resultset_05()           
@@ -121,4 +126,17 @@ public class TestResultSetFormat1
         ByteArrayOutputStream out = new ByteArrayOutputStream() ;
         ResultSetFormatter.outputAsCSV(out, rs) ;
     }
+    
+    private static void areIsomorphic(ResultSet x, ResultSet y)
+    {
+        ResultSetRewindable rs1 = ResultSetFactory.makeRewindable(x) ;
+        ResultSetRewindable rs2 = ResultSetFactory.makeRewindable(y) ;
+//        System.out.println(ResultSetFormatter.asText(rs1));
+//        System.out.println();
+//        System.out.println(ResultSetFormatter.asText(rs2));
+//        rs1.reset();
+//        rs2.reset();
+        Assert.assertTrue(ResultSetCompare.isomorphic(rs1, rs2)) ;
+    }
+
 }

Modified: incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/resultset/TestResultSetFormat2.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/resultset/TestResultSetFormat2.java?rev=1229633&r1=1229632&r2=1229633&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/resultset/TestResultSetFormat2.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/test/java/com/hp/hpl/jena/sparql/resultset/TestResultSetFormat2.java Tue Jan 10 16:47:22 2012
@@ -18,18 +18,20 @@
 
 package com.hp.hpl.jena.sparql.resultset;
 
-import java.io.ByteArrayInputStream;
+import java.io.ByteArrayInputStream ;
 
-import org.junit.Test;
-import org.openjena.atlas.lib.StrUtils;
+import org.junit.Test ;
+import org.openjena.atlas.lib.StrUtils ;
 
-import com.hp.hpl.jena.query.ResultSet;
-import com.hp.hpl.jena.query.ResultSetFactory;
-import com.hp.hpl.jena.sparql.ARQException;
+import com.hp.hpl.jena.query.QueryException ;
+import com.hp.hpl.jena.query.ResultSet ;
+import com.hp.hpl.jena.query.ResultSetFactory ;
+import com.hp.hpl.jena.sparql.engine.binding.Binding ;
 
 public class TestResultSetFormat2
 {
-    @Test (expected=ARQException.class) public void resultset_10()
+    @Test (expected=QueryException.class) 
+    public void resultset_10()
     {
         // This is illegal
         // Two vars, row of 3 values.
@@ -37,9 +39,16 @@ public class TestResultSetFormat2
         byte[] b = StrUtils.asUTF8bytes(x) ;
         ByteArrayInputStream in = new ByteArrayInputStream(b) ;
         ResultSet rs2 = ResultSetFactory.fromTSV(in) ;
+        
+        while (rs2.hasNext())
+        {
+        	Binding binding = rs2.nextBinding();
+        	System.out.println(binding);
+        }
     }
 
-    @Test (expected=ARQException.class) public void resultset_11()
+    @Test (expected=QueryException.class) 
+    public void resultset_11()
     {
         // This is illegal
         // Two vars, row of 1 value only.
@@ -47,6 +56,12 @@ public class TestResultSetFormat2
         byte[] b = StrUtils.asUTF8bytes(x) ;
         ByteArrayInputStream in = new ByteArrayInputStream(b) ;
         ResultSet rs2 = ResultSetFactory.fromTSV(in) ;
+        
+        while (rs2.hasNext())
+        {
+        	Binding binding = rs2.nextBinding();
+        	System.out.println(binding);
+        }
     }    
     
 }