You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by ca...@apache.org on 2011/06/16 08:58:10 UTC
svn commit: r1136300 -
/incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/resultset/TSVInput.java
Author: castagna
Date: Thu Jun 16 06:58:10 2011
New Revision: 1136300
URL: http://svn.apache.org/viewvc?rev=1136300&view=rev
Log:
JENA-72 - This now uses StringTokenizer and TokenizerFactory.makeTokenizerString() to parse TSV files. All tests pass.
Modified:
incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/resultset/TSVInput.java
Modified: incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/resultset/TSVInput.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/resultset/TSVInput.java?rev=1136300&r1=1136299&r2=1136300&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/resultset/TSVInput.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/resultset/TSVInput.java Thu Jun 16 06:58:10 2011
@@ -1,14 +1,19 @@
package com.hp.hpl.jena.sparql.resultset;
+import java.io.BufferedReader;
+import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
+import java.util.StringTokenizer;
-import org.openjena.riot.tokens.Token;
+import org.openjena.atlas.io.IO;
import org.openjena.riot.tokens.Tokenizer;
import org.openjena.riot.tokens.TokenizerFactory;
+import com.hp.hpl.jena.graph.Node;
import com.hp.hpl.jena.query.ResultSet;
+import com.hp.hpl.jena.sparql.ARQException;
import com.hp.hpl.jena.sparql.core.Var;
import com.hp.hpl.jena.sparql.engine.ResultSetStream;
import com.hp.hpl.jena.sparql.engine.binding.Binding;
@@ -23,49 +28,45 @@ import com.hp.hpl.jena.sparql.engine.ite
public class TSVInput {
public static ResultSet fromTSV(InputStream in) {
- Tokenizer tokenizer = TokenizerFactory.makeTokenizerUTF8(in);
+ BufferedReader reader = IO.asBufferedUTF8(in);
+ List<Var> vars = new ArrayList<Var>();
List<String> varNames = new ArrayList<String>();
List<Binding> bindings = new ArrayList<Binding>();
-
- // reads the variables
- List<Var> vars = new ArrayList<Var>();
- Token token = null;
- while (tokenizer.getLine() == 1)
- {
- token = tokenizer.next();
-
- if (token.isWord())
- {
- vars.add(Var.alloc(token.asWord()));
- varNames.add(vars.get(vars.size() - 1).getName());
- }
- }
-
- Binding binding = BindingFactory.create();;
- // the first token from the second line is already
- // consumed, hence we have to apply a specific
- // behavior to handle it
- binding.add(vars.get(0), token.asNode());
- for (byte i=1; i<vars.size(); i++)
- {
- binding.add(vars.get(i), tokenizer.next().asNode());
- }
- bindings.add(binding);
-
- // reads the next lines
- while (tokenizer.hasNext())
- {
- binding = BindingFactory.create();
-
- // reads each node from a line
- for (byte i=1; i<vars.size(); i++)
- {
- binding.add(vars.get(i), tokenizer.next().asNode());
- }
-
- bindings.add(binding);
+
+ boolean first = true;
+ try {
+ String line = null;
+ while ( ( line = reader.readLine() ) != null ) {
+ StringTokenizer st = new StringTokenizer(line, "\t");
+ if ( first ) {
+ while ( st.hasMoreTokens() ) {
+ String token = st.nextToken();
+ if ( token.startsWith("?") )
+ token = token.substring(1);
+ Var var = Var.alloc(token);
+ vars.add(var);
+ varNames.add(var.getName());
+ }
+ first = false;
+ } else {
+ int i = 0;
+ Binding binding = BindingFactory.create();
+ while ( st.hasMoreTokens() ) {
+ String token = st.nextToken();
+ Tokenizer tokenizer = TokenizerFactory.makeTokenizerString(token);
+ if ( tokenizer.hasNext() ) {
+ Node node = tokenizer.next().asNode();
+ binding.add(vars.get(i), node);
+ i++;
+ }
+ }
+ bindings.add(binding);
+ }
+ }
+ } catch ( IOException ex ) {
+ throw new ARQException(ex) ;
}
-
+
return new ResultSetStream(varNames, null, new QueryIterPlainWrapper(bindings.iterator()));
}