You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@jena.apache.org by Paolo Castagna <ca...@googlemail.com> on 2011/06/16 09:29:45 UTC

[Fwd: svn commit: r1136300 - /incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/resultset/TSVInput.java]

FYI

The JIRA at Apache is 'linked' with the Subversion commit comments.

If you add the JIRA issue in your comment (for example: JENA-72)
it will appear in the Subversion Commits tab on JIRA.
For example, see the Subversion Commit tab here:
https://issues.apache.org/jira/browse/JENA-72

I find this a very useful way to communicate with others and
I find it useful to go back to an issue and see all the changes
made to close that issue.

However, it requires committers to write a comment and add the
JIRA issue to is somewhere.

Paolo


-------- Original Message --------
Subject: svn commit: r1136300 - 
/incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/resultset/TSVInput.java
Date: Thu, 16 Jun 2011 06:58:10 -0000
From: castagna@apache.org
Reply-To: jena-dev@incubator.apache.org
To: jena-commits@incubator.apache.org

Author: castagna
Date: Thu Jun 16 06:58:10 2011
New Revision: 1136300

URL: http://svn.apache.org/viewvc?rev=1136300&view=rev
Log:
JENA-72 - This now uses StringTokenizer and 
TokenizerFactory.makeTokenizerString() to parse TSV files. All tests pass.

Modified:
 
incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/resultset/TSVInput.java

Modified: 
incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/resultset/TSVInput.java
URL: 
http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/resultset/TSVInput.java?rev=1136300&r1=1136299&r2=1136300&view=diff
==============================================================================
--- 
incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/resultset/TSVInput.java 
(original)
+++ 
incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/resultset/TSVInput.java 
Thu Jun 16 06:58:10 2011
@@ -1,14 +1,19 @@
  package com.hp.hpl.jena.sparql.resultset;

+import java.io.BufferedReader;
+import java.io.IOException;
  import java.io.InputStream;
  import java.util.ArrayList;
  import java.util.List;
+import java.util.StringTokenizer;

-import org.openjena.riot.tokens.Token;
+import org.openjena.atlas.io.IO;
  import org.openjena.riot.tokens.Tokenizer;
  import org.openjena.riot.tokens.TokenizerFactory;

+import com.hp.hpl.jena.graph.Node;
  import com.hp.hpl.jena.query.ResultSet;
+import com.hp.hpl.jena.sparql.ARQException;
  import com.hp.hpl.jena.sparql.core.Var;
  import com.hp.hpl.jena.sparql.engine.ResultSetStream;
  import com.hp.hpl.jena.sparql.engine.binding.Binding;
@@ -23,49 +28,45 @@ import com.hp.hpl.jena.sparql.engine.ite
  public class TSVInput {

      public static ResultSet fromTSV(InputStream in) {
-        Tokenizer tokenizer = TokenizerFactory.makeTokenizerUTF8(in);
+    	BufferedReader reader = IO.asBufferedUTF8(in);
+        List<Var> vars = new ArrayList<Var>();
          List<String> varNames = new ArrayList<String>();
          List<Binding> bindings = new ArrayList<Binding>();
-
-        // reads the variables
-        List<Var> vars = new ArrayList<Var>();
-        Token token = null;
-        while (tokenizer.getLine() == 1)
-        {
-            token = tokenizer.next();
-
-            if (token.isWord())
-            {
-                vars.add(Var.alloc(token.asWord()));
-                varNames.add(vars.get(vars.size() - 1).getName());
-            }
-        }
-
-        Binding binding = BindingFactory.create();;
-        // the first token from the second line is already
-        // consumed, hence we have to apply a specific
-        // behavior to handle it
-        binding.add(vars.get(0), token.asNode());
-        for (byte i=1; i<vars.size(); i++)
-        {
-            binding.add(vars.get(i), tokenizer.next().asNode());
-        }
-        bindings.add(binding);
-
-        // reads the next lines
-        while (tokenizer.hasNext())
-        {
-            binding = BindingFactory.create();
-
-            // reads each node from a line
-            for (byte i=1; i<vars.size(); i++)
-            {
-                binding.add(vars.get(i), tokenizer.next().asNode());
-            }
-
-            bindings.add(binding);
+
+        boolean first = true;
+        try {
+        	String line = null;
+        	while ( ( line = reader.readLine() ) != null ) {
+            	StringTokenizer st = new StringTokenizer(line, "\t");
+        		if ( first ) {
+                	while ( st.hasMoreTokens() ) {
+                		String token = st.nextToken();
+                		if ( token.startsWith("?") )
+                			token = token.substring(1);
+                		Var var = Var.alloc(token);
+                		vars.add(var);
+                		varNames.add(var.getName());
+                	}
+                	first = false;
+        		} else {
+        			int i = 0;
+        	        Binding binding = BindingFactory.create();
+                	while ( st.hasMoreTokens() ) {
+                		String token = st.nextToken();
+                		Tokenizer tokenizer = 
TokenizerFactory.makeTokenizerString(token);
+                		if ( tokenizer.hasNext() ) {
+                			Node node = tokenizer.next().asNode();
+                			binding.add(vars.get(i), node);
+                			i++;
+                		}
+                	}
+                	bindings.add(binding);
+        		}
+        	}
+        } catch ( IOException ex ) {
+        	throw new ARQException(ex) ;
          }
-
+
          return new ResultSetStream(varNames, null, new 
QueryIterPlainWrapper(bindings.iterator()));
      }