You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by rv...@apache.org on 2012/07/03 01:33:09 UTC

svn commit: r1356519 - /jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/resultset/TSVInputIterator.java

Author: rvesse
Date: Mon Jul  2 23:33:08 2012
New Revision: 1356519

URL: http://svn.apache.org/viewvc?rev=1356519&view=rev
Log:
Major performance boost to SPARQL TSV parser (25x performance improvement seen on a 3.5 million result file - 6s down from 150s)

Change to achieve this is to make the parser avoid calling into NodeFactory.parseNode() where possible by sniffing terms to see if they look like URIs/Blank Nodes based on the first character or so and assuming they are and just generating a Node directly in those cases.  We now only call into NodeFactory.parseNode() for literals because literals are complicated and we want all the nice escape handling the parser will give us.

Modified:
    jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/resultset/TSVInputIterator.java

Modified: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/resultset/TSVInputIterator.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/resultset/TSVInputIterator.java?rev=1356519&r1=1356518&r2=1356519&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/resultset/TSVInputIterator.java (original)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/resultset/TSVInputIterator.java Mon Jul  2 23:33:08 2012
@@ -28,6 +28,7 @@ import org.openjena.atlas.io.IndentedWri
 
 import com.hp.hpl.jena.graph.Node ;
 import com.hp.hpl.jena.query.QueryException ;
+import com.hp.hpl.jena.rdf.model.AnonId;
 import com.hp.hpl.jena.sparql.core.Var ;
 import com.hp.hpl.jena.sparql.engine.binding.Binding ;
 import com.hp.hpl.jena.sparql.engine.binding.BindingFactory ;
@@ -121,9 +122,9 @@ public class TSVInputIterator extends Qu
 	        	//If we see an empty string this denotes an unbound value
 	        	if (token.equals("")) continue; 
 	
-	        		//Bound value so parse it and add to the binding
-	        		Node node = NodeFactory.parseNode(token, null);
-	        		this.binding.add(this.vars.get(i), node);
+        		//Bound value so parse it and add to the binding
+        		Node node = parseNode(token);
+        		this.binding.add(this.vars.get(i), node);
 	        }
     	} catch (Exception e) {
     		throw new QueryException(String.format("Error Parsing TSV results at Line %d - The result row '%s' contains an invalid encoding of a Node", this.lineNum, line));
@@ -131,6 +132,16 @@ public class TSVInputIterator extends Qu
 
         return true;
 	}
+	
+	private Node parseNode(String token) {
+		if (token.startsWith("_:")) {
+			return Node.createAnon(new AnonId(token.substring(2)));
+		} else if (token.startsWith("<")) {
+			return Node.createURI(token.substring(1, token.length()-1));
+		} else {
+			return NodeFactory.parseNode(token, null);
+		}
+	}
 
 	@Override
 	protected Binding moveToNextBinding() {