You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@jena.apache.org by Paolo Castagna <ca...@googlemail.com> on 2011/06/16 09:29:45 UTC
[Fwd: svn commit: r1136300 - /incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/resultset/TSVInput.java]
FYI
The JIRA at Apache is 'linked' with the Subversion commit comments.
If you add the JIRA issue in your comment (for example: JENA-72)
it will appear in the Subversion Commits tab on JIRA.
For example, see the Subversion Commit tab here:
https://issues.apache.org/jira/browse/JENA-72
I find this a very useful way to communicate with others and
I find it useful to go back to an issue and see all the changes
made to close that issue.
However, it requires committers to write a comment and add the
JIRA issue to is somewhere.
Paolo
-------- Original Message --------
Subject: svn commit: r1136300 -
/incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/resultset/TSVInput.java
Date: Thu, 16 Jun 2011 06:58:10 -0000
From: castagna@apache.org
Reply-To: jena-dev@incubator.apache.org
To: jena-commits@incubator.apache.org
Author: castagna
Date: Thu Jun 16 06:58:10 2011
New Revision: 1136300
URL: http://svn.apache.org/viewvc?rev=1136300&view=rev
Log:
JENA-72 - This now uses StringTokenizer and
TokenizerFactory.makeTokenizerString() to parse TSV files. All tests pass.
Modified:
incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/resultset/TSVInput.java
Modified:
incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/resultset/TSVInput.java
URL:
http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/resultset/TSVInput.java?rev=1136300&r1=1136299&r2=1136300&view=diff
==============================================================================
---
incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/resultset/TSVInput.java
(original)
+++
incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/resultset/TSVInput.java
Thu Jun 16 06:58:10 2011
@@ -1,14 +1,19 @@
package com.hp.hpl.jena.sparql.resultset;
+import java.io.BufferedReader;
+import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
+import java.util.StringTokenizer;
-import org.openjena.riot.tokens.Token;
+import org.openjena.atlas.io.IO;
import org.openjena.riot.tokens.Tokenizer;
import org.openjena.riot.tokens.TokenizerFactory;
+import com.hp.hpl.jena.graph.Node;
import com.hp.hpl.jena.query.ResultSet;
+import com.hp.hpl.jena.sparql.ARQException;
import com.hp.hpl.jena.sparql.core.Var;
import com.hp.hpl.jena.sparql.engine.ResultSetStream;
import com.hp.hpl.jena.sparql.engine.binding.Binding;
@@ -23,49 +28,45 @@ import com.hp.hpl.jena.sparql.engine.ite
public class TSVInput {
public static ResultSet fromTSV(InputStream in) {
- Tokenizer tokenizer = TokenizerFactory.makeTokenizerUTF8(in);
+ BufferedReader reader = IO.asBufferedUTF8(in);
+ List<Var> vars = new ArrayList<Var>();
List<String> varNames = new ArrayList<String>();
List<Binding> bindings = new ArrayList<Binding>();
-
- // reads the variables
- List<Var> vars = new ArrayList<Var>();
- Token token = null;
- while (tokenizer.getLine() == 1)
- {
- token = tokenizer.next();
-
- if (token.isWord())
- {
- vars.add(Var.alloc(token.asWord()));
- varNames.add(vars.get(vars.size() - 1).getName());
- }
- }
-
- Binding binding = BindingFactory.create();;
- // the first token from the second line is already
- // consumed, hence we have to apply a specific
- // behavior to handle it
- binding.add(vars.get(0), token.asNode());
- for (byte i=1; i<vars.size(); i++)
- {
- binding.add(vars.get(i), tokenizer.next().asNode());
- }
- bindings.add(binding);
-
- // reads the next lines
- while (tokenizer.hasNext())
- {
- binding = BindingFactory.create();
-
- // reads each node from a line
- for (byte i=1; i<vars.size(); i++)
- {
- binding.add(vars.get(i), tokenizer.next().asNode());
- }
-
- bindings.add(binding);
+
+ boolean first = true;
+ try {
+ String line = null;
+ while ( ( line = reader.readLine() ) != null ) {
+ StringTokenizer st = new StringTokenizer(line, "\t");
+ if ( first ) {
+ while ( st.hasMoreTokens() ) {
+ String token = st.nextToken();
+ if ( token.startsWith("?") )
+ token = token.substring(1);
+ Var var = Var.alloc(token);
+ vars.add(var);
+ varNames.add(var.getName());
+ }
+ first = false;
+ } else {
+ int i = 0;
+ Binding binding = BindingFactory.create();
+ while ( st.hasMoreTokens() ) {
+ String token = st.nextToken();
+ Tokenizer tokenizer =
TokenizerFactory.makeTokenizerString(token);
+ if ( tokenizer.hasNext() ) {
+ Node node = tokenizer.next().asNode();
+ binding.add(vars.get(i), node);
+ i++;
+ }
+ }
+ bindings.add(binding);
+ }
+ }
+ } catch ( IOException ex ) {
+ throw new ARQException(ex) ;
}
-
+
return new ResultSetStream(varNames, null, new
QueryIterPlainWrapper(bindings.iterator()));
}